Update image outputs to default to high detail (#18386)

Do not assume the default `detail`.
This commit is contained in:
pakrym-oai
2026-04-18 11:01:12 -07:00
committed by GitHub
Unverified
parent e3c2acb9cd
commit 53b1570367
35 changed files with 245 additions and 93 deletions
@@ -471,6 +471,16 @@
},
{
"properties": {
"detail": {
"anyOf": [
{
"$ref": "#/definitions/ImageDetail"
},
{
"type": "null"
}
]
},
"image_url": {
"type": "string"
},
@@ -7173,6 +7173,16 @@
},
{
"properties": {
"detail": {
"anyOf": [
{
"$ref": "#/definitions/v2/ImageDetail"
},
{
"type": "null"
}
]
},
"image_url": {
"type": "string"
},
@@ -3750,6 +3750,16 @@
},
{
"properties": {
"detail": {
"anyOf": [
{
"$ref": "#/definitions/ImageDetail"
},
{
"type": "null"
}
]
},
"image_url": {
"type": "string"
},
@@ -25,6 +25,16 @@
},
{
"properties": {
"detail": {
"anyOf": [
{
"$ref": "#/definitions/ImageDetail"
},
{
"type": "null"
}
]
},
"image_url": {
"type": "string"
},
@@ -83,6 +83,16 @@
},
{
"properties": {
"detail": {
"anyOf": [
{
"$ref": "#/definitions/ImageDetail"
},
{
"type": "null"
}
]
},
"image_url": {
"type": "string"
},
@@ -1,5 +1,6 @@
// GENERATED CODE! DO NOT MODIFY BY HAND!
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
import type { ImageDetail } from "./ImageDetail";
export type ContentItem = { "type": "input_text", text: string, } | { "type": "input_image", image_url: string, } | { "type": "output_text", text: string, };
export type ContentItem = { "type": "input_text", text: string, } | { "type": "input_image", image_url: string, detail?: ImageDetail, } | { "type": "output_text", text: string, };
@@ -21,6 +21,7 @@ use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -477,7 +478,7 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<(
DynamicToolCallOutputContentItem::InputImage { image_url } => {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}
}
})
@@ -535,7 +536,8 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<(
},
{
"type": "input_image",
"image_url": "data:image/png;base64,AAA"
"image_url": "data:image/png;base64,AAA",
"detail": "high"
}
])
);
+1 -1
View File
@@ -26,7 +26,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"Run JavaScript code to orchestrate/co
- Global helpers:
- `exit()`: Immediately ends the current script successfully (like an early return from the top level).
- `text(value: string | number | boolean | undefined | null)`: Appends a text item. Non-string values are stringified with `JSON.stringify(...)` when possible.
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request original detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument.
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument.
- `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session.
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
- `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`.
+1
View File
@@ -15,6 +15,7 @@ pub use description::normalize_code_mode_identifier;
pub use description::parse_exec_source;
pub use description::render_code_mode_sample;
pub use description::render_json_schema_to_typescript;
pub use response::DEFAULT_IMAGE_DETAIL;
pub use response::FunctionCallOutputContentItem;
pub use response::ImageDetail;
pub use runtime::DEFAULT_EXEC_YIELD_TIME_MS;
+2
View File
@@ -10,6 +10,8 @@ pub enum ImageDetail {
Original,
}
pub const DEFAULT_IMAGE_DETAIL: ImageDetail = ImageDetail::High;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum FunctionCallOutputContentItem {
+3 -2
View File
@@ -1,5 +1,6 @@
use serde_json::Value as JsonValue;
use crate::response::DEFAULT_IMAGE_DETAIL;
use crate::response::FunctionCallOutputContentItem;
use crate::response::ImageDetail;
@@ -81,7 +82,7 @@ pub(super) fn normalize_output_image(
}
})
}
None => None,
None => Some(DEFAULT_IMAGE_DETAIL),
};
Ok(FunctionCallOutputContentItem::InputImage { image_url, detail })
@@ -159,7 +160,7 @@ fn parse_mcp_output_image(
.and_then(JsonValue::as_object)
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
.and_then(JsonValue::as_str)
.filter(|detail| *detail == "original")
.filter(|detail| matches!(*detail, "auto" | "low" | "high" | "original"))
.map(str::to_string);
Ok((image_url, detail))
}
+1 -1
View File
@@ -669,7 +669,7 @@ text(JSON.stringify(returnsUndefined));
},
FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/image.jpg".to_string(),
detail: None,
detail: Some(crate::DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "[true,true,true]".to_string(),
+2
View File
@@ -1,6 +1,7 @@
use super::*;
use codex_model_provider_info::ModelProviderInfo;
use codex_model_provider_info::WireApi;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use pretty_assertions::assert_eq;
async fn process_compacted_history_with_test_session(
@@ -45,6 +46,7 @@ fn content_items_to_text_joins_non_empty_segments() {
fn content_items_to_text_ignores_image_only_content() {
let items = vec![ContentItem::InputImage {
image_url: "file://image.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}];
let joined = content_items_to_text(&items);
+2 -2
View File
@@ -649,8 +649,8 @@ fn image_data_url_estimate_adjustment(item: &ResponseItem) -> (i64, i64) {
match item {
ResponseItem::Message { content, .. } => {
for content_item in content {
if let ContentItem::InputImage { image_url } = content_item {
accumulate(image_url, None);
if let ContentItem::InputImage { image_url, detail } = content_item {
accumulate(image_url, *detail);
}
}
}
@@ -6,6 +6,7 @@ use codex_protocol::AgentPath;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -382,6 +383,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
ContentItem::InputImage {
image_url: "https://example.com/img.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: "caption".to_string(),
@@ -405,7 +407,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/result.png".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
},
@@ -425,7 +427,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/js-repl-result.png".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
},
@@ -506,6 +508,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
ContentItem::InputImage {
image_url: "https://example.com/img.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,
@@ -715,7 +718,7 @@ fn replace_last_turn_images_replaces_tool_output_images() {
body: FunctionCallOutputBody::ContentItems(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
success: Some(true),
@@ -752,6 +755,7 @@ fn replace_last_turn_images_does_not_touch_user_images() {
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
@@ -1680,7 +1684,10 @@ fn image_data_url_payload_does_not_dominate_message_estimate() {
ContentItem::InputText {
text: "Here is the screenshot".to_string(),
},
ContentItem::InputImage { image_url },
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,
phase: None,
@@ -1717,7 +1724,7 @@ fn image_data_url_payload_does_not_dominate_function_call_output_estimate() {
},
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1743,7 +1750,7 @@ fn image_data_url_payload_does_not_dominate_custom_tool_call_output_estimate() {
},
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1763,6 +1770,7 @@ fn non_base64_image_urls_are_unchanged() {
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: "https://example.com/foo.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
@@ -1772,7 +1780,7 @@ fn non_base64_image_urls_are_unchanged() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "file:///tmp/foo.png".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1794,6 +1802,7 @@ fn data_url_without_base64_marker_is_unchanged() {
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: "data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'/>".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
@@ -1814,7 +1823,7 @@ fn non_image_base64_data_url_is_unchanged() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1832,7 +1841,10 @@ fn mixed_case_data_url_markers_are_adjusted() {
let item = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage { image_url }],
content: vec![ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
};
@@ -1859,9 +1871,11 @@ fn multiple_inline_images_apply_multiple_fixed_costs() {
},
ContentItem::InputImage {
image_url: image_url_one,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputImage {
image_url: image_url_two,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,
+1 -1
View File
@@ -90,7 +90,7 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
text_elements: Vec::new(),
});
}
ContentItem::InputImage { image_url } => {
ContentItem::InputImage { image_url, .. } => {
content.push(UserInput::Image {
image_url: image_url.clone(),
});
+5
View File
@@ -5,6 +5,7 @@ use codex_protocol::items::TurnItem;
use codex_protocol::items::WebSearchItem;
use codex_protocol::items::build_hook_prompt_message;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::ResponseItem;
@@ -26,9 +27,11 @@ fn parses_user_message_with_text_and_two_images() {
},
ContentItem::InputImage {
image_url: img1.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputImage {
image_url: img2.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,
@@ -66,6 +69,7 @@ fn skips_local_image_label_text() {
ContentItem::InputText { text: label },
ContentItem::InputImage {
image_url: image_url.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: "</image>".to_string(),
@@ -145,6 +149,7 @@ fn skips_unnamed_image_label_text() {
ContentItem::InputText { text: label },
ContentItem::InputImage {
image_url: image_url.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: codex_protocol::models::image_close_tag_text(),
@@ -1,4 +1,5 @@
use codex_code_mode::ImageDetail as CodeModeImageDetail;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ImageDetail;
@@ -36,7 +37,9 @@ impl IntoProtocol<FunctionCallOutputContentItem>
codex_code_mode::FunctionCallOutputContentItem::InputImage { image_url, detail } => {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: detail.map(IntoProtocol::into_protocol),
detail: detail
.map(IntoProtocol::into_protocol)
.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
}
+3 -2
View File
@@ -7,6 +7,7 @@ use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::mcp::CallToolResult;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -463,10 +464,10 @@ pub(crate) fn response_input_to_code_mode_result(response: ResponseInputItem) ->
| codex_protocol::models::ContentItem::OutputText { text } => {
FunctionCallOutputContentItem::InputText { text }
}
codex_protocol::models::ContentItem::InputImage { image_url } => {
codex_protocol::models::ContentItem::InputImage { image_url, detail } => {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: detail.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
})
+4 -3
View File
@@ -1,4 +1,5 @@
use super::*;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use core_test_support::assert_regex_match;
use pretty_assertions::assert_eq;
use serde_json::json;
@@ -173,7 +174,7 @@ fn mcp_tool_output_response_item_preserves_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: image_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
.as_slice()
@@ -239,7 +240,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "line 2".to_string(),
@@ -259,7 +260,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "line 2".to_string(),
@@ -1,3 +1,4 @@
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -133,7 +134,11 @@ impl ToolHandler for ViewImageHandler {
} else {
PromptImageMode::ResizeToFit
};
let image_detail = use_original_detail.then_some(ImageDetail::Original);
let image_detail = Some(if use_original_detail {
ImageDetail::Original
} else {
DEFAULT_IMAGE_DETAIL
});
let image =
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| {
@@ -210,7 +215,7 @@ mod tests {
fn code_mode_result_returns_image_url_object() {
let output = ViewImageOutput {
image_url: "data:image/png;base64,AAA".to_string(),
image_detail: None,
image_detail: Some(DEFAULT_IMAGE_DETAIL),
};
let result = output.code_mode_result(&ToolPayload::Function {
@@ -221,7 +226,7 @@ mod tests {
result,
json!({
"image_url": "data:image/png;base64,AAA",
"detail": null,
"detail": "high",
})
);
}
+11 -4
View File
@@ -1225,9 +1225,9 @@ function parseImageDetail(detail) {
if (typeof detail !== "string" || !detail) {
throw new Error("codex.emitImage expected detail to be a non-empty string");
}
if (detail !== "original") {
if (!["auto", "low", "high", "original"].includes(detail)) {
throw new Error(
'codex.emitImage only supports detail "original"; omit detail for default behavior',
'codex.emitImage expected detail to be one of "auto", "low", "high", or "original"',
);
}
return detail;
@@ -1331,10 +1331,17 @@ function normalizeMcpImageData(data, mimeType) {
}
function parseMcpImageDetail(meta) {
if (!isPlainObject(meta) || meta["codex/imageDetail"] !== "original") {
if (!isPlainObject(meta)) {
return undefined;
}
return "original";
const detail = meta["codex/imageDetail"];
if (
typeof detail !== "string" ||
!["auto", "low", "high", "original"].includes(detail)
) {
return undefined;
}
return detail;
}
function parseMcpToolResult(result) {
+3 -1
View File
@@ -10,6 +10,7 @@ use std::time::Duration;
use codex_protocol::ThreadId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
@@ -1750,7 +1751,8 @@ fn emitted_image_content_item(
) -> FunctionCallOutputContentItem {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: normalize_output_image_detail(&turn.model_info, detail),
detail: normalize_output_image_detail(&turn.model_info, detail)
.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
+17 -19
View File
@@ -5,6 +5,7 @@ use crate::turn_diff_tracker::TurnDiffTracker;
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
use codex_protocol::dynamic_tools::DynamicToolResponse;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
@@ -253,7 +254,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,abcd".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -277,7 +278,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
}
#[tokio::test]
async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
async fn emitted_image_content_item_preserves_explicit_non_original_detail() {
let (_session, turn) = make_session_and_context().await;
let content_item = emitted_image_content_item(
&turn,
@@ -288,7 +289,7 @@ async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
content_item,
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(ImageDetail::Low),
}
);
}
@@ -314,7 +315,7 @@ async fn emitted_image_content_item_allows_explicit_original_detail_when_support
}
#[tokio::test]
async fn emitted_image_content_item_drops_explicit_original_detail_when_unsupported() {
async fn emitted_image_content_item_defaults_to_high_for_unsupported_original_detail() {
let (_session, turn) = make_session_and_context().await;
let content_item = emitted_image_content_item(
@@ -327,7 +328,7 @@ async fn emitted_image_content_item_drops_explicit_original_detail_when_unsuppor
content_item,
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}
);
}
@@ -356,7 +357,7 @@ fn summarize_tool_call_response_for_multimodal_custom_output() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,abcd".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1213,7 +1214,7 @@ console.log(out.type);
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1268,7 +1269,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png" });
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1325,13 +1326,13 @@ await codex.emitImage(
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url:
"data:image/gif;base64,R0lGODdhAQABAIAAAP///////ywAAAAAAQABAAACAkQBADs="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
.as_slice()
@@ -1387,7 +1388,7 @@ console.log("cell-complete");
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1465,11 +1466,11 @@ console.log("helpers-ran");
vec![
FunctionCallOutputContentItem::InputImage {
image_url: data_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url: data_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
);
@@ -1701,7 +1702,7 @@ await codex.emitImage("DATA:image/png;base64,AAA");
result.content_items.as_slice(),
[FunctionCallOutputContentItem::InputImage {
image_url: "DATA:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1751,10 +1752,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: "ultra" });
)
.await
.expect_err("invalid detail should fail");
assert!(
err.to_string()
.contains("only supports detail \"original\"")
);
assert!(err.to_string().contains("expected detail to be one of"));
assert!(session.get_pending_input().await.is_empty());
Ok(())
@@ -1804,7 +1802,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: null });
result.content_items.as_slice(),
[FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
+2
View File
@@ -24,6 +24,7 @@ use codex_protocol::config_types::Settings;
use codex_protocol::config_types::Verbosity;
use codex_protocol::error::CodexErr;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
@@ -511,6 +512,7 @@ async fn resume_replays_legacy_js_repl_image_rollout_shapes() {
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: legacy_image_url.to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
+4 -2
View File
@@ -1970,14 +1970,16 @@ image("data:image/png;base64,AAA");
items[1],
serde_json::json!({
"type": "input_image",
"image_url": "https://example.com/image.jpg"
"image_url": "https://example.com/image.jpg",
"detail": "high"
}),
);
assert_eq!(
items[2],
serde_json::json!({
"type": "input_image",
"image_url": "data:image/png;base64,AAA"
"image_url": "data:image/png;base64,AAA",
"detail": "high"
}),
);
+10 -3
View File
@@ -1,5 +1,6 @@
use anyhow::Context;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
@@ -51,7 +52,7 @@ fn find_user_message_with_image(text: &str) -> Option<ResponseItem> {
fn extract_image_url(item: &ResponseItem) -> Option<String> {
match item {
ResponseItem::Message { content, .. } => content.iter().find_map(|span| match span {
ContentItem::InputImage { image_url } => Some(image_url.clone()),
ContentItem::InputImage { image_url, .. } => Some(image_url.clone()),
_ => None,
}),
_ => None,
@@ -150,7 +151,10 @@ async fn copy_paste_local_image_persists_rollout_request_shape() -> anyhow::Resu
ContentItem::InputText {
text: codex_protocol::models::local_image_open_tag_text(/*label_number*/ 1),
},
ContentItem::InputImage { image_url },
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: codex_protocol::models::image_close_tag_text(),
},
@@ -234,7 +238,10 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()>
ContentItem::InputText {
text: codex_protocol::models::image_open_tag_text(),
},
ContentItem::InputImage { image_url },
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: codex_protocol::models::image_close_tag_text(),
},
+2 -1
View File
@@ -787,7 +787,8 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> {
output[1],
json!({
"type": "input_image",
"image_url": OPENAI_PNG
"image_url": OPENAI_PNG,
"detail": "high"
})
);
server.verify().await;
@@ -21,6 +21,6 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
05:message/user[4]:
[01] <image>
[02] <input_image:image_url>
[02] <input_image:detail,image_url>
[03] </image>
[04] USER_THREE
+1 -1
View File
@@ -533,7 +533,7 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
);
assert_eq!(
arr[1],
json!({"type": "input_image", "image_url": openai_png})
json!({"type": "input_image", "image_url": openai_png, "detail": "high"})
);
Ok(())
+12 -3
View File
@@ -631,7 +631,10 @@ async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> {
.and_then(Value::as_array)
.expect("function_call_output should be a content item array");
assert_eq!(output_items.len(), 1);
assert_eq!(output_items[0].get("detail"), None);
assert_eq!(
output_items[0].get("detail").and_then(Value::as_str),
Some("high")
);
let image_url = output_items[0]
.get("image_url")
.and_then(Value::as_str)
@@ -728,7 +731,10 @@ async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> a
.and_then(Value::as_array)
.expect("function_call_output should be a content item array");
assert_eq!(output_items.len(), 1);
assert_eq!(output_items[0].get("detail"), None);
assert_eq!(
output_items[0].get("detail").and_then(Value::as_str),
Some("high")
);
let image_url = output_items[0]
.get("image_url")
@@ -829,7 +835,10 @@ async fn view_image_tool_does_not_force_original_resolution_with_capability_only
.and_then(Value::as_array)
.expect("function_call_output should be a content item array");
assert_eq!(output_items.len(), 1);
assert_eq!(output_items[0].get("detail"), None);
assert_eq!(
output_items[0].get("detail").and_then(Value::as_str),
Some("high")
);
let image_url = output_items[0]
.get("image_url")
.and_then(Value::as_str)
+46 -18
View File
@@ -208,9 +208,18 @@ pub enum ResponseInputItem {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentItem {
InputText { text: String },
InputImage { image_url: String },
OutputText { text: String },
InputText {
text: String,
},
InputImage {
image_url: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
detail: Option<ImageDetail>,
},
OutputText {
text: String,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)]
@@ -222,6 +231,8 @@ pub enum ImageDetail {
Original,
}
pub const DEFAULT_IMAGE_DETAIL: ImageDetail = ImageDetail::High;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
/// Classifies an assistant message as interim commentary or final answer text.
@@ -935,6 +946,7 @@ pub fn local_image_content_items_with_label_number(
}
items.push(ContentItem::InputImage {
image_url: image.into_data_url(),
detail: Some(DEFAULT_IMAGE_DETAIL),
});
if label_number.is_some() {
items.push(ContentItem::InputText {
@@ -1082,7 +1094,10 @@ impl From<Vec<UserInput>> for ResponseInputItem {
ContentItem::InputText {
text: image_open_tag_text(),
},
ContentItem::InputImage { image_url },
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: image_close_tag_text(),
},
@@ -1225,7 +1240,7 @@ impl From<crate::dynamic_tools::DynamicToolCallOutputContentItem>
crate::dynamic_tools::DynamicToolCallOutputContentItem::InputImage { image_url } => {
Self::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}
}
}
@@ -1462,9 +1477,13 @@ fn convert_mcp_content_to_items(
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
.and_then(serde_json::Value::as_str)
.and_then(|detail| match detail {
"auto" => Some(ImageDetail::Auto),
"low" => Some(ImageDetail::Low),
"high" => Some(ImageDetail::High),
"original" => Some(ImageDetail::Original),
_ => None,
}),
})
.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
Ok(McpContent::Unknown) | Err(_) => FunctionCallOutputContentItem::InputText {
@@ -1555,7 +1574,7 @@ mod tests {
items,
vec![FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,Zm9v".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
);
}
@@ -1630,7 +1649,7 @@ mod tests {
items,
vec![FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,Zm9v".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
);
}
@@ -1653,7 +1672,7 @@ mod tests {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "line 2".to_string(),
@@ -1672,7 +1691,7 @@ mod tests {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
];
@@ -1695,7 +1714,7 @@ mod tests {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]);
@@ -2267,7 +2286,7 @@ mod tests {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,BASE64".into(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
);
@@ -2294,7 +2313,7 @@ mod tests {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,BASE64".into(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -2330,7 +2349,7 @@ mod tests {
items,
vec![FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,BASE64".into(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
);
@@ -2370,7 +2389,7 @@ mod tests {
}
#[test]
fn ignores_unknown_mcp_image_detail_metadata() -> Result<()> {
fn preserves_standard_detail_metadata_on_mcp_images() -> Result<()> {
let call_tool_result = CallToolResult {
content: vec![serde_json::json!({
"type": "image",
@@ -2394,7 +2413,7 @@ mod tests {
items,
vec![FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,BASE64".into(),
detail: None,
detail: Some(ImageDetail::High),
}]
);
@@ -2574,7 +2593,10 @@ mod tests {
ContentItem::InputText {
text: image_open_tag_text(),
},
ContentItem::InputImage { image_url },
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: image_close_tag_text(),
},
@@ -2779,7 +2801,13 @@ mod tests {
text: image_open_tag_text(),
})
);
assert_eq!(content.get(1), Some(&ContentItem::InputImage { image_url }));
assert_eq!(
content.get(1),
Some(&ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
})
);
assert_eq!(
content.get(2),
Some(&ContentItem::InputText {
+4 -2
View File
@@ -1,3 +1,4 @@
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ImageDetail;
use codex_protocol::openai_models::ModelInfo;
@@ -14,7 +15,8 @@ pub fn normalize_output_image_detail(
Some(ImageDetail::Original) if can_request_original_image_detail(model_info) => {
Some(ImageDetail::Original)
}
Some(ImageDetail::Original) | Some(_) | None => None,
Some(ImageDetail::Original) | None => None,
Some(ImageDetail::Auto | ImageDetail::Low | ImageDetail::High) => detail,
}
}
@@ -30,7 +32,7 @@ pub fn sanitize_original_image_detail(
if let FunctionCallOutputContentItem::InputImage { detail, .. } = item
&& matches!(detail, Some(ImageDetail::Original))
{
*detail = None;
*detail = Some(DEFAULT_IMAGE_DETAIL);
}
}
}
+9 -4
View File
@@ -1,4 +1,5 @@
use super::*;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ImageDetail;
use codex_protocol::openai_models::ModelInfo;
@@ -66,17 +67,21 @@ fn explicit_original_is_dropped_without_model_support() {
}
#[test]
fn unsupported_non_original_detail_is_dropped() {
fn explicit_non_original_detail_is_preserved() {
let model_info = model_info();
assert_eq!(
normalize_output_image_detail(&model_info, Some(ImageDetail::Low)),
None
Some(ImageDetail::Low)
);
assert_eq!(
normalize_output_image_detail(&model_info, Some(ImageDetail::High)),
Some(ImageDetail::High)
);
}
#[test]
fn sanitize_original_drops_original_without_support() {
fn sanitize_original_falls_back_to_high_without_support() {
let mut items = vec![
FunctionCallOutputContentItem::InputText {
text: "header".to_string(),
@@ -101,7 +106,7 @@ fn sanitize_original_drops_original_without_support() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,BBB".to_string(),
@@ -5,6 +5,7 @@ use crate::formatted_truncate_text;
use crate::formatted_truncate_text_content_items_with_policy;
use crate::truncate_function_output_items_with_policy;
use crate::truncate_text;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use pretty_assertions::assert_eq;
@@ -114,7 +115,7 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
FunctionCallOutputContentItem::InputText { text: t2.clone() },
FunctionCallOutputContentItem::InputImage {
image_url: "img:mid".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText { text: t3 },
FunctionCallOutputContentItem::InputText { text: t4 },
@@ -142,7 +143,7 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
output[2],
FunctionCallOutputContentItem::InputImage {
image_url: "img:mid".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}
);
@@ -214,7 +215,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:one".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "efgh".to_string(),
@@ -224,7 +225,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:two".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
];
@@ -239,11 +240,11 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:one".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:two".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
);