From 53b15703678f7cf222d1619c8690145454e576a2 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Sat, 18 Apr 2026 11:01:12 -0700 Subject: [PATCH] Update image outputs to default to high detail (#18386) Do not assume the default `detail`. --- .../schema/json/ClientRequest.json | 10 +++ .../codex_app_server_protocol.schemas.json | 10 +++ .../codex_app_server_protocol.v2.schemas.json | 10 +++ .../RawResponseItemCompletedNotification.json | 10 +++ .../schema/json/v2/ThreadResumeParams.json | 10 +++ .../schema/typescript/ContentItem.ts | 3 +- .../tests/suite/v2/dynamic_tools.rs | 6 +- codex-rs/code-mode/src/description.rs | 2 +- codex-rs/code-mode/src/lib.rs | 1 + codex-rs/code-mode/src/response.rs | 2 + codex-rs/code-mode/src/runtime/value.rs | 5 +- codex-rs/code-mode/src/service.rs | 2 +- codex-rs/core/src/compact_tests.rs | 2 + codex-rs/core/src/context_manager/history.rs | 4 +- .../core/src/context_manager/history_tests.rs | 32 +++++++--- codex-rs/core/src/event_mapping.rs | 2 +- codex-rs/core/src/event_mapping_tests.rs | 5 ++ .../src/tools/code_mode/response_adapter.rs | 5 +- codex-rs/core/src/tools/context.rs | 5 +- codex-rs/core/src/tools/context_tests.rs | 7 +- .../core/src/tools/handlers/view_image.rs | 11 +++- codex-rs/core/src/tools/js_repl/kernel.js | 15 +++-- codex-rs/core/src/tools/js_repl/mod.rs | 4 +- codex-rs/core/src/tools/js_repl/mod_tests.rs | 36 +++++------ codex-rs/core/tests/suite/client.rs | 2 + codex-rs/core/tests/suite/code_mode.rs | 6 +- codex-rs/core/tests/suite/image_rollout.rs | 13 +++- codex-rs/core/tests/suite/rmcp_client.rs | 3 +- ..._compaction_including_incoming_shapes.snap | 2 +- codex-rs/core/tests/suite/truncation.rs | 2 +- codex-rs/core/tests/suite/view_image.rs | 15 ++++- codex-rs/protocol/src/models.rs | 64 +++++++++++++------ codex-rs/tools/src/image_detail.rs | 6 +- codex-rs/tools/src/image_detail_tests.rs | 13 ++-- .../output-truncation/src/truncate_tests.rs | 13 ++-- 35 files changed, 245 insertions(+), 93 deletions(-) diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index 3c6a8c467..5aae5fd1d 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -471,6 +471,16 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ] + }, "image_url": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 6ce0d2190..bf8dbb166 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -7173,6 +7173,16 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/v2/ImageDetail" + }, + { + "type": "null" + } + ] + }, "image_url": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json index 74cde880e..59b830d4f 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json @@ -3750,6 +3750,16 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ] + }, "image_url": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json index 2b0c66da4..956e3b250 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json @@ -25,6 +25,16 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ] + }, "image_url": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json index 3c8eb552a..35a9e5b2a 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json @@ -83,6 +83,16 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ] + }, "image_url": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/typescript/ContentItem.ts b/codex-rs/app-server-protocol/schema/typescript/ContentItem.ts index c89b9d78a..21cd8d02f 100644 --- a/codex-rs/app-server-protocol/schema/typescript/ContentItem.ts +++ b/codex-rs/app-server-protocol/schema/typescript/ContentItem.ts @@ -1,5 +1,6 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ImageDetail } from "./ImageDetail"; -export type ContentItem = { "type": "input_text", text: string, } | { "type": "input_image", image_url: string, } | { "type": "output_text", text: string, }; +export type ContentItem = { "type": "input_text", text: string, } | { "type": "input_image", image_url: string, detail?: ImageDetail, } | { "type": "output_text", text: string, }; diff --git a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs index 0a3315a07..50dd071d8 100644 --- a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs +++ b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs @@ -21,6 +21,7 @@ use codex_app_server_protocol::ThreadStartResponse; use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::UserInput as V2UserInput; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; @@ -477,7 +478,7 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<( DynamicToolCallOutputContentItem::InputImage { image_url } => { FunctionCallOutputContentItem::InputImage { image_url, - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), } } }) @@ -535,7 +536,8 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<( }, { "type": "input_image", - "image_url": "data:image/png;base64,AAA" + "image_url": "data:image/png;base64,AAA", + "detail": "high" } ]) ); diff --git a/codex-rs/code-mode/src/description.rs b/codex-rs/code-mode/src/description.rs index 7f5e47fa2..4c5eb6fbd 100644 --- a/codex-rs/code-mode/src/description.rs +++ b/codex-rs/code-mode/src/description.rs @@ -26,7 +26,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"Run JavaScript code to orchestrate/co - Global helpers: - `exit()`: Immediately ends the current script successfully (like an early return from the top level). - `text(value: string | number | boolean | undefined | null)`: Appends a text item. Non-string values are stringified with `JSON.stringify(...)` when possible. -- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request original detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument. +- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument. - `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session. - `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing. - `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`. diff --git a/codex-rs/code-mode/src/lib.rs b/codex-rs/code-mode/src/lib.rs index 880e84ef4..bb27d9996 100644 --- a/codex-rs/code-mode/src/lib.rs +++ b/codex-rs/code-mode/src/lib.rs @@ -15,6 +15,7 @@ pub use description::normalize_code_mode_identifier; pub use description::parse_exec_source; pub use description::render_code_mode_sample; pub use description::render_json_schema_to_typescript; +pub use response::DEFAULT_IMAGE_DETAIL; pub use response::FunctionCallOutputContentItem; pub use response::ImageDetail; pub use runtime::DEFAULT_EXEC_YIELD_TIME_MS; diff --git a/codex-rs/code-mode/src/response.rs b/codex-rs/code-mode/src/response.rs index 43579fac8..0ac3a0377 100644 --- a/codex-rs/code-mode/src/response.rs +++ b/codex-rs/code-mode/src/response.rs @@ -10,6 +10,8 @@ pub enum ImageDetail { Original, } +pub const DEFAULT_IMAGE_DETAIL: ImageDetail = ImageDetail::High; + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] pub enum FunctionCallOutputContentItem { diff --git a/codex-rs/code-mode/src/runtime/value.rs b/codex-rs/code-mode/src/runtime/value.rs index 5c63434f4..8d76a832d 100644 --- a/codex-rs/code-mode/src/runtime/value.rs +++ b/codex-rs/code-mode/src/runtime/value.rs @@ -1,5 +1,6 @@ use serde_json::Value as JsonValue; +use crate::response::DEFAULT_IMAGE_DETAIL; use crate::response::FunctionCallOutputContentItem; use crate::response::ImageDetail; @@ -81,7 +82,7 @@ pub(super) fn normalize_output_image( } }) } - None => None, + None => Some(DEFAULT_IMAGE_DETAIL), }; Ok(FunctionCallOutputContentItem::InputImage { image_url, detail }) @@ -159,7 +160,7 @@ fn parse_mcp_output_image( .and_then(JsonValue::as_object) .and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY)) .and_then(JsonValue::as_str) - .filter(|detail| *detail == "original") + .filter(|detail| matches!(*detail, "auto" | "low" | "high" | "original")) .map(str::to_string); Ok((image_url, detail)) } diff --git a/codex-rs/code-mode/src/service.rs b/codex-rs/code-mode/src/service.rs index 79ca010c1..4a46d36b4 100644 --- a/codex-rs/code-mode/src/service.rs +++ b/codex-rs/code-mode/src/service.rs @@ -669,7 +669,7 @@ text(JSON.stringify(returnsUndefined)); }, FunctionCallOutputContentItem::InputImage { image_url: "https://example.com/image.jpg".to_string(), - detail: None, + detail: Some(crate::DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputText { text: "[true,true,true]".to_string(), diff --git a/codex-rs/core/src/compact_tests.rs b/codex-rs/core/src/compact_tests.rs index 6c482defa..ed2fd8487 100644 --- a/codex-rs/core/src/compact_tests.rs +++ b/codex-rs/core/src/compact_tests.rs @@ -1,6 +1,7 @@ use super::*; use codex_model_provider_info::ModelProviderInfo; use codex_model_provider_info::WireApi; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use pretty_assertions::assert_eq; async fn process_compacted_history_with_test_session( @@ -45,6 +46,7 @@ fn content_items_to_text_joins_non_empty_segments() { fn content_items_to_text_ignores_image_only_content() { let items = vec![ContentItem::InputImage { image_url: "file://image.png".to_string(), + detail: Some(DEFAULT_IMAGE_DETAIL), }]; let joined = content_items_to_text(&items); diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index db2c6b58b..c4bdc916f 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -649,8 +649,8 @@ fn image_data_url_estimate_adjustment(item: &ResponseItem) -> (i64, i64) { match item { ResponseItem::Message { content, .. } => { for content_item in content { - if let ContentItem::InputImage { image_url } = content_item { - accumulate(image_url, None); + if let ContentItem::InputImage { image_url, detail } = content_item { + accumulate(image_url, *detail); } } } diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index ff71b797b..1df14ca8b 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -6,6 +6,7 @@ use codex_protocol::AgentPath; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::models::BaseInstructions; use codex_protocol::models::ContentItem; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; @@ -382,6 +383,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() { }, ContentItem::InputImage { image_url: "https://example.com/img.png".to_string(), + detail: Some(DEFAULT_IMAGE_DETAIL), }, ContentItem::InputText { text: "caption".to_string(), @@ -405,7 +407,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() { }, FunctionCallOutputContentItem::InputImage { image_url: "https://example.com/result.png".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }, @@ -425,7 +427,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() { }, FunctionCallOutputContentItem::InputImage { image_url: "https://example.com/js-repl-result.png".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }, @@ -506,6 +508,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() { }, ContentItem::InputImage { image_url: "https://example.com/img.png".to_string(), + detail: Some(DEFAULT_IMAGE_DETAIL), }, ], end_turn: None, @@ -715,7 +718,7 @@ fn replace_last_turn_images_replaces_tool_output_images() { body: FunctionCallOutputBody::ContentItems(vec![ FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), success: Some(true), @@ -752,6 +755,7 @@ fn replace_last_turn_images_does_not_touch_user_images() { role: "user".to_string(), content: vec![ContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), + detail: Some(DEFAULT_IMAGE_DETAIL), }], end_turn: None, phase: None, @@ -1680,7 +1684,10 @@ fn image_data_url_payload_does_not_dominate_message_estimate() { ContentItem::InputText { text: "Here is the screenshot".to_string(), }, - ContentItem::InputImage { image_url }, + ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, ], end_turn: None, phase: None, @@ -1717,7 +1724,7 @@ fn image_data_url_payload_does_not_dominate_function_call_output_estimate() { }, FunctionCallOutputContentItem::InputImage { image_url, - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }; @@ -1743,7 +1750,7 @@ fn image_data_url_payload_does_not_dominate_custom_tool_call_output_estimate() { }, FunctionCallOutputContentItem::InputImage { image_url, - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }; @@ -1763,6 +1770,7 @@ fn non_base64_image_urls_are_unchanged() { role: "user".to_string(), content: vec![ContentItem::InputImage { image_url: "https://example.com/foo.png".to_string(), + detail: Some(DEFAULT_IMAGE_DETAIL), }], end_turn: None, phase: None, @@ -1772,7 +1780,7 @@ fn non_base64_image_urls_are_unchanged() { output: FunctionCallOutputPayload::from_content_items(vec![ FunctionCallOutputContentItem::InputImage { image_url: "file:///tmp/foo.png".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }; @@ -1794,6 +1802,7 @@ fn data_url_without_base64_marker_is_unchanged() { role: "user".to_string(), content: vec![ContentItem::InputImage { image_url: "data:image/svg+xml,".to_string(), + detail: Some(DEFAULT_IMAGE_DETAIL), }], end_turn: None, phase: None, @@ -1814,7 +1823,7 @@ fn non_image_base64_data_url_is_unchanged() { output: FunctionCallOutputPayload::from_content_items(vec![ FunctionCallOutputContentItem::InputImage { image_url, - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }; @@ -1832,7 +1841,10 @@ fn mixed_case_data_url_markers_are_adjusted() { let item = ResponseItem::Message { id: None, role: "user".to_string(), - content: vec![ContentItem::InputImage { image_url }], + content: vec![ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }], end_turn: None, phase: None, }; @@ -1859,9 +1871,11 @@ fn multiple_inline_images_apply_multiple_fixed_costs() { }, ContentItem::InputImage { image_url: image_url_one, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ContentItem::InputImage { image_url: image_url_two, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ], end_turn: None, diff --git a/codex-rs/core/src/event_mapping.rs b/codex-rs/core/src/event_mapping.rs index 5e174944f..21e13f6c1 100644 --- a/codex-rs/core/src/event_mapping.rs +++ b/codex-rs/core/src/event_mapping.rs @@ -90,7 +90,7 @@ fn parse_user_message(message: &[ContentItem]) -> Option { text_elements: Vec::new(), }); } - ContentItem::InputImage { image_url } => { + ContentItem::InputImage { image_url, .. } => { content.push(UserInput::Image { image_url: image_url.clone(), }); diff --git a/codex-rs/core/src/event_mapping_tests.rs b/codex-rs/core/src/event_mapping_tests.rs index a06111fd9..0cadc5fbd 100644 --- a/codex-rs/core/src/event_mapping_tests.rs +++ b/codex-rs/core/src/event_mapping_tests.rs @@ -5,6 +5,7 @@ use codex_protocol::items::TurnItem; use codex_protocol::items::WebSearchItem; use codex_protocol::items::build_hook_prompt_message; use codex_protocol::models::ContentItem; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::ReasoningItemContent; use codex_protocol::models::ReasoningItemReasoningSummary; use codex_protocol::models::ResponseItem; @@ -26,9 +27,11 @@ fn parses_user_message_with_text_and_two_images() { }, ContentItem::InputImage { image_url: img1.clone(), + detail: Some(DEFAULT_IMAGE_DETAIL), }, ContentItem::InputImage { image_url: img2.clone(), + detail: Some(DEFAULT_IMAGE_DETAIL), }, ], end_turn: None, @@ -66,6 +69,7 @@ fn skips_local_image_label_text() { ContentItem::InputText { text: label }, ContentItem::InputImage { image_url: image_url.clone(), + detail: Some(DEFAULT_IMAGE_DETAIL), }, ContentItem::InputText { text: "".to_string(), @@ -145,6 +149,7 @@ fn skips_unnamed_image_label_text() { ContentItem::InputText { text: label }, ContentItem::InputImage { image_url: image_url.clone(), + detail: Some(DEFAULT_IMAGE_DETAIL), }, ContentItem::InputText { text: codex_protocol::models::image_close_tag_text(), diff --git a/codex-rs/core/src/tools/code_mode/response_adapter.rs b/codex-rs/core/src/tools/code_mode/response_adapter.rs index b90448acf..e20cf6a07 100644 --- a/codex-rs/core/src/tools/code_mode/response_adapter.rs +++ b/codex-rs/core/src/tools/code_mode/response_adapter.rs @@ -1,4 +1,5 @@ use codex_code_mode::ImageDetail as CodeModeImageDetail; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::ImageDetail; @@ -36,7 +37,9 @@ impl IntoProtocol codex_code_mode::FunctionCallOutputContentItem::InputImage { image_url, detail } => { FunctionCallOutputContentItem::InputImage { image_url, - detail: detail.map(IntoProtocol::into_protocol), + detail: detail + .map(IntoProtocol::into_protocol) + .or(Some(DEFAULT_IMAGE_DETAIL)), } } } diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index a2f3a7f7c..4e144b550 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -7,6 +7,7 @@ use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE; use crate::turn_diff_tracker::TurnDiffTracker; use crate::unified_exec::resolve_max_tokens; use codex_protocol::mcp::CallToolResult; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; @@ -463,10 +464,10 @@ pub(crate) fn response_input_to_code_mode_result(response: ResponseInputItem) -> | codex_protocol::models::ContentItem::OutputText { text } => { FunctionCallOutputContentItem::InputText { text } } - codex_protocol::models::ContentItem::InputImage { image_url } => { + codex_protocol::models::ContentItem::InputImage { image_url, detail } => { FunctionCallOutputContentItem::InputImage { image_url, - detail: None, + detail: detail.or(Some(DEFAULT_IMAGE_DETAIL)), } } }) diff --git a/codex-rs/core/src/tools/context_tests.rs b/codex-rs/core/src/tools/context_tests.rs index 8df9159ed..c62328dff 100644 --- a/codex-rs/core/src/tools/context_tests.rs +++ b/codex-rs/core/src/tools/context_tests.rs @@ -1,4 +1,5 @@ use super::*; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use core_test_support::assert_regex_match; use pretty_assertions::assert_eq; use serde_json::json; @@ -173,7 +174,7 @@ fn mcp_tool_output_response_item_preserves_content_items() { }, FunctionCallOutputContentItem::InputImage { image_url: image_url.to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ] .as_slice() @@ -239,7 +240,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputText { text: "line 2".to_string(), @@ -259,7 +260,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputText { text: "line 2".to_string(), diff --git a/codex-rs/core/src/tools/handlers/view_image.rs b/codex-rs/core/src/tools/handlers/view_image.rs index 33ce2054a..8f3f69701 100644 --- a/codex-rs/core/src/tools/handlers/view_image.rs +++ b/codex-rs/core/src/tools/handlers/view_image.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; @@ -133,7 +134,11 @@ impl ToolHandler for ViewImageHandler { } else { PromptImageMode::ResizeToFit }; - let image_detail = use_original_detail.then_some(ImageDetail::Original); + let image_detail = Some(if use_original_detail { + ImageDetail::Original + } else { + DEFAULT_IMAGE_DETAIL + }); let image = load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| { @@ -210,7 +215,7 @@ mod tests { fn code_mode_result_returns_image_url_object() { let output = ViewImageOutput { image_url: "data:image/png;base64,AAA".to_string(), - image_detail: None, + image_detail: Some(DEFAULT_IMAGE_DETAIL), }; let result = output.code_mode_result(&ToolPayload::Function { @@ -221,7 +226,7 @@ mod tests { result, json!({ "image_url": "data:image/png;base64,AAA", - "detail": null, + "detail": "high", }) ); } diff --git a/codex-rs/core/src/tools/js_repl/kernel.js b/codex-rs/core/src/tools/js_repl/kernel.js index 3e5cf855f..3eb3e916c 100644 --- a/codex-rs/core/src/tools/js_repl/kernel.js +++ b/codex-rs/core/src/tools/js_repl/kernel.js @@ -1225,9 +1225,9 @@ function parseImageDetail(detail) { if (typeof detail !== "string" || !detail) { throw new Error("codex.emitImage expected detail to be a non-empty string"); } - if (detail !== "original") { + if (!["auto", "low", "high", "original"].includes(detail)) { throw new Error( - 'codex.emitImage only supports detail "original"; omit detail for default behavior', + 'codex.emitImage expected detail to be one of "auto", "low", "high", or "original"', ); } return detail; @@ -1331,10 +1331,17 @@ function normalizeMcpImageData(data, mimeType) { } function parseMcpImageDetail(meta) { - if (!isPlainObject(meta) || meta["codex/imageDetail"] !== "original") { + if (!isPlainObject(meta)) { return undefined; } - return "original"; + const detail = meta["codex/imageDetail"]; + if ( + typeof detail !== "string" || + !["auto", "low", "high", "original"].includes(detail) + ) { + return undefined; + } + return detail; } function parseMcpToolResult(result) { diff --git a/codex-rs/core/src/tools/js_repl/mod.rs b/codex-rs/core/src/tools/js_repl/mod.rs index deaab9c2f..23f4906e5 100644 --- a/codex-rs/core/src/tools/js_repl/mod.rs +++ b/codex-rs/core/src/tools/js_repl/mod.rs @@ -10,6 +10,7 @@ use std::time::Duration; use codex_protocol::ThreadId; use codex_protocol::models::ContentItem; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ImageDetail; @@ -1750,7 +1751,8 @@ fn emitted_image_content_item( ) -> FunctionCallOutputContentItem { FunctionCallOutputContentItem::InputImage { image_url, - detail: normalize_output_image_detail(&turn.model_info, detail), + detail: normalize_output_image_detail(&turn.model_info, detail) + .or(Some(DEFAULT_IMAGE_DETAIL)), } } diff --git a/codex-rs/core/src/tools/js_repl/mod_tests.rs b/codex-rs/core/src/tools/js_repl/mod_tests.rs index 128fda60d..af53a5975 100644 --- a/codex-rs/core/src/tools/js_repl/mod_tests.rs +++ b/codex-rs/core/src/tools/js_repl/mod_tests.rs @@ -5,6 +5,7 @@ use crate::turn_diff_tracker::TurnDiffTracker; use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolResponse; use codex_protocol::dynamic_tools::DynamicToolSpec; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ImageDetail; @@ -253,7 +254,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() { output: FunctionCallOutputPayload::from_content_items(vec![ FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,abcd".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }; @@ -277,7 +278,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() { } #[tokio::test] -async fn emitted_image_content_item_drops_unsupported_explicit_detail() { +async fn emitted_image_content_item_preserves_explicit_non_original_detail() { let (_session, turn) = make_session_and_context().await; let content_item = emitted_image_content_item( &turn, @@ -288,7 +289,7 @@ async fn emitted_image_content_item_drops_unsupported_explicit_detail() { content_item, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(ImageDetail::Low), } ); } @@ -314,7 +315,7 @@ async fn emitted_image_content_item_allows_explicit_original_detail_when_support } #[tokio::test] -async fn emitted_image_content_item_drops_explicit_original_detail_when_unsupported() { +async fn emitted_image_content_item_defaults_to_high_for_unsupported_original_detail() { let (_session, turn) = make_session_and_context().await; let content_item = emitted_image_content_item( @@ -327,7 +328,7 @@ async fn emitted_image_content_item_drops_explicit_original_detail_when_unsuppor content_item, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), } ); } @@ -356,7 +357,7 @@ fn summarize_tool_call_response_for_multimodal_custom_output() { output: FunctionCallOutputPayload::from_content_items(vec![ FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,abcd".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }; @@ -1213,7 +1214,7 @@ console.log(out.type); image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==" .to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] .as_slice() ); @@ -1268,7 +1269,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png" }); image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==" .to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] .as_slice() ); @@ -1325,13 +1326,13 @@ await codex.emitImage( image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==" .to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/gif;base64,R0lGODdhAQABAIAAAP///////ywAAAAAAQABAAACAkQBADs=" .to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ] .as_slice() @@ -1387,7 +1388,7 @@ console.log("cell-complete"); image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==" .to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] .as_slice() ); @@ -1465,11 +1466,11 @@ console.log("helpers-ran"); vec![ FunctionCallOutputContentItem::InputImage { image_url: data_url.to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputImage { image_url: data_url.to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ] ); @@ -1701,7 +1702,7 @@ await codex.emitImage("DATA:image/png;base64,AAA"); result.content_items.as_slice(), [FunctionCallOutputContentItem::InputImage { image_url: "DATA:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] .as_slice() ); @@ -1751,10 +1752,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: "ultra" }); ) .await .expect_err("invalid detail should fail"); - assert!( - err.to_string() - .contains("only supports detail \"original\"") - ); + assert!(err.to_string().contains("expected detail to be one of")); assert!(session.get_pending_input().await.is_empty()); Ok(()) @@ -1804,7 +1802,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: null }); result.content_items.as_slice(), [FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] .as_slice() ); diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index b5087de5a..33bc8d378 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -24,6 +24,7 @@ use codex_protocol::config_types::Settings; use codex_protocol::config_types::Verbosity; use codex_protocol::error::CodexErr; use codex_protocol::models::ContentItem; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ImageDetail; @@ -511,6 +512,7 @@ async fn resume_replays_legacy_js_repl_image_rollout_shapes() { role: "user".to_string(), content: vec![ContentItem::InputImage { image_url: legacy_image_url.to_string(), + detail: Some(DEFAULT_IMAGE_DETAIL), }], end_turn: None, phase: None, diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index 474d221d2..383e6050e 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -1970,14 +1970,16 @@ image("data:image/png;base64,AAA"); items[1], serde_json::json!({ "type": "input_image", - "image_url": "https://example.com/image.jpg" + "image_url": "https://example.com/image.jpg", + "detail": "high" }), ); assert_eq!( items[2], serde_json::json!({ "type": "input_image", - "image_url": "data:image/png;base64,AAA" + "image_url": "data:image/png;base64,AAA", + "detail": "high" }), ); diff --git a/codex-rs/core/tests/suite/image_rollout.rs b/codex-rs/core/tests/suite/image_rollout.rs index 8195bd0a8..a7ec8318f 100644 --- a/codex-rs/core/tests/suite/image_rollout.rs +++ b/codex-rs/core/tests/suite/image_rollout.rs @@ -1,5 +1,6 @@ use anyhow::Context; use codex_protocol::models::ContentItem; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; @@ -51,7 +52,7 @@ fn find_user_message_with_image(text: &str) -> Option { fn extract_image_url(item: &ResponseItem) -> Option { match item { ResponseItem::Message { content, .. } => content.iter().find_map(|span| match span { - ContentItem::InputImage { image_url } => Some(image_url.clone()), + ContentItem::InputImage { image_url, .. } => Some(image_url.clone()), _ => None, }), _ => None, @@ -150,7 +151,10 @@ async fn copy_paste_local_image_persists_rollout_request_shape() -> anyhow::Resu ContentItem::InputText { text: codex_protocol::models::local_image_open_tag_text(/*label_number*/ 1), }, - ContentItem::InputImage { image_url }, + ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, ContentItem::InputText { text: codex_protocol::models::image_close_tag_text(), }, @@ -234,7 +238,10 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()> ContentItem::InputText { text: codex_protocol::models::image_open_tag_text(), }, - ContentItem::InputImage { image_url }, + ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, ContentItem::InputText { text: codex_protocol::models::image_close_tag_text(), }, diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index 733edf063..a32f21f7c 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -787,7 +787,8 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> { output[1], json!({ "type": "input_image", - "image_url": OPENAI_PNG + "image_url": OPENAI_PNG, + "detail": "high" }) ); server.verify().await; diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap b/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap index 404d876dc..2edd63d3d 100644 --- a/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap +++ b/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap @@ -21,6 +21,6 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif 04:message/user: 05:message/user[4]: [01] - [02] + [02] [03] [04] USER_THREE diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 81c22e1b6..d0c19f9be 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -533,7 +533,7 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> { ); assert_eq!( arr[1], - json!({"type": "input_image", "image_url": openai_png}) + json!({"type": "input_image", "image_url": openai_png, "detail": "high"}) ); Ok(()) diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 891c4d238..9f53e60d7 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -631,7 +631,10 @@ async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> { .and_then(Value::as_array) .expect("function_call_output should be a content item array"); assert_eq!(output_items.len(), 1); - assert_eq!(output_items[0].get("detail"), None); + assert_eq!( + output_items[0].get("detail").and_then(Value::as_str), + Some("high") + ); let image_url = output_items[0] .get("image_url") .and_then(Value::as_str) @@ -728,7 +731,10 @@ async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> a .and_then(Value::as_array) .expect("function_call_output should be a content item array"); assert_eq!(output_items.len(), 1); - assert_eq!(output_items[0].get("detail"), None); + assert_eq!( + output_items[0].get("detail").and_then(Value::as_str), + Some("high") + ); let image_url = output_items[0] .get("image_url") @@ -829,7 +835,10 @@ async fn view_image_tool_does_not_force_original_resolution_with_capability_only .and_then(Value::as_array) .expect("function_call_output should be a content item array"); assert_eq!(output_items.len(), 1); - assert_eq!(output_items[0].get("detail"), None); + assert_eq!( + output_items[0].get("detail").and_then(Value::as_str), + Some("high") + ); let image_url = output_items[0] .get("image_url") .and_then(Value::as_str) diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 15dc3d36e..02cb8c432 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -208,9 +208,18 @@ pub enum ResponseInputItem { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ContentItem { - InputText { text: String }, - InputImage { image_url: String }, - OutputText { text: String }, + InputText { + text: String, + }, + InputImage { + image_url: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + detail: Option, + }, + OutputText { + text: String, + }, } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)] @@ -222,6 +231,8 @@ pub enum ImageDetail { Original, } +pub const DEFAULT_IMAGE_DETAIL: ImageDetail = ImageDetail::High; + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "snake_case")] /// Classifies an assistant message as interim commentary or final answer text. @@ -935,6 +946,7 @@ pub fn local_image_content_items_with_label_number( } items.push(ContentItem::InputImage { image_url: image.into_data_url(), + detail: Some(DEFAULT_IMAGE_DETAIL), }); if label_number.is_some() { items.push(ContentItem::InputText { @@ -1082,7 +1094,10 @@ impl From> for ResponseInputItem { ContentItem::InputText { text: image_open_tag_text(), }, - ContentItem::InputImage { image_url }, + ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, ContentItem::InputText { text: image_close_tag_text(), }, @@ -1225,7 +1240,7 @@ impl From crate::dynamic_tools::DynamicToolCallOutputContentItem::InputImage { image_url } => { Self::InputImage { image_url, - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), } } } @@ -1462,9 +1477,13 @@ fn convert_mcp_content_to_items( .and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY)) .and_then(serde_json::Value::as_str) .and_then(|detail| match detail { + "auto" => Some(ImageDetail::Auto), + "low" => Some(ImageDetail::Low), + "high" => Some(ImageDetail::High), "original" => Some(ImageDetail::Original), _ => None, - }), + }) + .or(Some(DEFAULT_IMAGE_DETAIL)), } } Ok(McpContent::Unknown) | Err(_) => FunctionCallOutputContentItem::InputText { @@ -1555,7 +1574,7 @@ mod tests { items, vec![FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,Zm9v".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] ); } @@ -1630,7 +1649,7 @@ mod tests { items, vec![FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,Zm9v".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] ); } @@ -1653,7 +1672,7 @@ mod tests { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputText { text: "line 2".to_string(), @@ -1672,7 +1691,7 @@ mod tests { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]; @@ -1695,7 +1714,7 @@ mod tests { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]); @@ -2267,7 +2286,7 @@ mod tests { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BASE64".into(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ] ); @@ -2294,7 +2313,7 @@ mod tests { output: FunctionCallOutputPayload::from_content_items(vec![ FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BASE64".into(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]), }; @@ -2330,7 +2349,7 @@ mod tests { items, vec![FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BASE64".into(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }] ); @@ -2370,7 +2389,7 @@ mod tests { } #[test] - fn ignores_unknown_mcp_image_detail_metadata() -> Result<()> { + fn preserves_standard_detail_metadata_on_mcp_images() -> Result<()> { let call_tool_result = CallToolResult { content: vec![serde_json::json!({ "type": "image", @@ -2394,7 +2413,7 @@ mod tests { items, vec![FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BASE64".into(), - detail: None, + detail: Some(ImageDetail::High), }] ); @@ -2574,7 +2593,10 @@ mod tests { ContentItem::InputText { text: image_open_tag_text(), }, - ContentItem::InputImage { image_url }, + ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, ContentItem::InputText { text: image_close_tag_text(), }, @@ -2779,7 +2801,13 @@ mod tests { text: image_open_tag_text(), }) ); - assert_eq!(content.get(1), Some(&ContentItem::InputImage { image_url })); + assert_eq!( + content.get(1), + Some(&ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }) + ); assert_eq!( content.get(2), Some(&ContentItem::InputText { diff --git a/codex-rs/tools/src/image_detail.rs b/codex-rs/tools/src/image_detail.rs index 56987e483..37086f691 100644 --- a/codex-rs/tools/src/image_detail.rs +++ b/codex-rs/tools/src/image_detail.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::ImageDetail; use codex_protocol::openai_models::ModelInfo; @@ -14,7 +15,8 @@ pub fn normalize_output_image_detail( Some(ImageDetail::Original) if can_request_original_image_detail(model_info) => { Some(ImageDetail::Original) } - Some(ImageDetail::Original) | Some(_) | None => None, + Some(ImageDetail::Original) | None => None, + Some(ImageDetail::Auto | ImageDetail::Low | ImageDetail::High) => detail, } } @@ -30,7 +32,7 @@ pub fn sanitize_original_image_detail( if let FunctionCallOutputContentItem::InputImage { detail, .. } = item && matches!(detail, Some(ImageDetail::Original)) { - *detail = None; + *detail = Some(DEFAULT_IMAGE_DETAIL); } } } diff --git a/codex-rs/tools/src/image_detail_tests.rs b/codex-rs/tools/src/image_detail_tests.rs index c1a0f8ca1..393a962ac 100644 --- a/codex-rs/tools/src/image_detail_tests.rs +++ b/codex-rs/tools/src/image_detail_tests.rs @@ -1,4 +1,5 @@ use super::*; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::ImageDetail; use codex_protocol::openai_models::ModelInfo; @@ -66,17 +67,21 @@ fn explicit_original_is_dropped_without_model_support() { } #[test] -fn unsupported_non_original_detail_is_dropped() { +fn explicit_non_original_detail_is_preserved() { let model_info = model_info(); assert_eq!( normalize_output_image_detail(&model_info, Some(ImageDetail::Low)), - None + Some(ImageDetail::Low) + ); + assert_eq!( + normalize_output_image_detail(&model_info, Some(ImageDetail::High)), + Some(ImageDetail::High) ); } #[test] -fn sanitize_original_drops_original_without_support() { +fn sanitize_original_falls_back_to_high_without_support() { let mut items = vec![ FunctionCallOutputContentItem::InputText { text: "header".to_string(), @@ -101,7 +106,7 @@ fn sanitize_original_drops_original_without_support() { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,AAA".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BBB".to_string(), diff --git a/codex-rs/utils/output-truncation/src/truncate_tests.rs b/codex-rs/utils/output-truncation/src/truncate_tests.rs index f159a6b62..74acb15ca 100644 --- a/codex-rs/utils/output-truncation/src/truncate_tests.rs +++ b/codex-rs/utils/output-truncation/src/truncate_tests.rs @@ -5,6 +5,7 @@ use crate::formatted_truncate_text; use crate::formatted_truncate_text_content_items_with_policy; use crate::truncate_function_output_items_with_policy; use crate::truncate_text; +use codex_protocol::models::DEFAULT_IMAGE_DETAIL; use codex_protocol::models::FunctionCallOutputContentItem; use pretty_assertions::assert_eq; @@ -114,7 +115,7 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { FunctionCallOutputContentItem::InputText { text: t2.clone() }, FunctionCallOutputContentItem::InputImage { image_url: "img:mid".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputText { text: t3 }, FunctionCallOutputContentItem::InputText { text: t4 }, @@ -142,7 +143,7 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { output[2], FunctionCallOutputContentItem::InputImage { image_url: "img:mid".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), } ); @@ -214,7 +215,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima }, FunctionCallOutputContentItem::InputImage { image_url: "img:one".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputText { text: "efgh".to_string(), @@ -224,7 +225,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima }, FunctionCallOutputContentItem::InputImage { image_url: "img:two".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ]; @@ -239,11 +240,11 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima }, FunctionCallOutputContentItem::InputImage { image_url: "img:one".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, FunctionCallOutputContentItem::InputImage { image_url: "img:two".to_string(), - detail: None, + detail: Some(DEFAULT_IMAGE_DETAIL), }, ] );