mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
Update image outputs to default to high detail (#18386)
Do not assume the default `detail`.
This commit is contained in:
committed by
GitHub
Unverified
parent
e3c2acb9cd
commit
53b1570367
@@ -471,6 +471,16 @@
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"detail": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ImageDetail"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string"
|
||||
},
|
||||
|
||||
@@ -7173,6 +7173,16 @@
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"detail": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/definitions/v2/ImageDetail"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string"
|
||||
},
|
||||
|
||||
@@ -3750,6 +3750,16 @@
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"detail": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ImageDetail"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string"
|
||||
},
|
||||
|
||||
@@ -25,6 +25,16 @@
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"detail": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ImageDetail"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string"
|
||||
},
|
||||
|
||||
@@ -83,6 +83,16 @@
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"detail": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ImageDetail"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string"
|
||||
},
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// GENERATED CODE! DO NOT MODIFY BY HAND!
|
||||
|
||||
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
|
||||
import type { ImageDetail } from "./ImageDetail";
|
||||
|
||||
export type ContentItem = { "type": "input_text", text: string, } | { "type": "input_image", image_url: string, } | { "type": "output_text", text: string, };
|
||||
export type ContentItem = { "type": "input_text", text: string, } | { "type": "input_image", image_url: string, detail?: ImageDetail, } | { "type": "output_text", text: string, };
|
||||
|
||||
@@ -21,6 +21,7 @@ use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::TurnStartParams;
|
||||
use codex_app_server_protocol::TurnStartResponse;
|
||||
use codex_app_server_protocol::UserInput as V2UserInput;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
@@ -477,7 +478,7 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<(
|
||||
DynamicToolCallOutputContentItem::InputImage { image_url } => {
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}
|
||||
}
|
||||
})
|
||||
@@ -535,7 +536,8 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<(
|
||||
},
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAA"
|
||||
"image_url": "data:image/png;base64,AAA",
|
||||
"detail": "high"
|
||||
}
|
||||
])
|
||||
);
|
||||
|
||||
@@ -26,7 +26,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"Run JavaScript code to orchestrate/co
|
||||
- Global helpers:
|
||||
- `exit()`: Immediately ends the current script successfully (like an early return from the top level).
|
||||
- `text(value: string | number | boolean | undefined | null)`: Appends a text item. Non-string values are stringified with `JSON.stringify(...)` when possible.
|
||||
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request original detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument.
|
||||
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument.
|
||||
- `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session.
|
||||
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
|
||||
- `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`.
|
||||
|
||||
@@ -15,6 +15,7 @@ pub use description::normalize_code_mode_identifier;
|
||||
pub use description::parse_exec_source;
|
||||
pub use description::render_code_mode_sample;
|
||||
pub use description::render_json_schema_to_typescript;
|
||||
pub use response::DEFAULT_IMAGE_DETAIL;
|
||||
pub use response::FunctionCallOutputContentItem;
|
||||
pub use response::ImageDetail;
|
||||
pub use runtime::DEFAULT_EXEC_YIELD_TIME_MS;
|
||||
|
||||
@@ -10,6 +10,8 @@ pub enum ImageDetail {
|
||||
Original,
|
||||
}
|
||||
|
||||
pub const DEFAULT_IMAGE_DETAIL: ImageDetail = ImageDetail::High;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum FunctionCallOutputContentItem {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use serde_json::Value as JsonValue;
|
||||
|
||||
use crate::response::DEFAULT_IMAGE_DETAIL;
|
||||
use crate::response::FunctionCallOutputContentItem;
|
||||
use crate::response::ImageDetail;
|
||||
|
||||
@@ -81,7 +82,7 @@ pub(super) fn normalize_output_image(
|
||||
}
|
||||
})
|
||||
}
|
||||
None => None,
|
||||
None => Some(DEFAULT_IMAGE_DETAIL),
|
||||
};
|
||||
|
||||
Ok(FunctionCallOutputContentItem::InputImage { image_url, detail })
|
||||
@@ -159,7 +160,7 @@ fn parse_mcp_output_image(
|
||||
.and_then(JsonValue::as_object)
|
||||
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
|
||||
.and_then(JsonValue::as_str)
|
||||
.filter(|detail| *detail == "original")
|
||||
.filter(|detail| matches!(*detail, "auto" | "low" | "high" | "original"))
|
||||
.map(str::to_string);
|
||||
Ok((image_url, detail))
|
||||
}
|
||||
|
||||
@@ -669,7 +669,7 @@ text(JSON.stringify(returnsUndefined));
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "https://example.com/image.jpg".to_string(),
|
||||
detail: None,
|
||||
detail: Some(crate::DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "[true,true,true]".to_string(),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::*;
|
||||
use codex_model_provider_info::ModelProviderInfo;
|
||||
use codex_model_provider_info::WireApi;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
async fn process_compacted_history_with_test_session(
|
||||
@@ -45,6 +46,7 @@ fn content_items_to_text_joins_non_empty_segments() {
|
||||
fn content_items_to_text_ignores_image_only_content() {
|
||||
let items = vec![ContentItem::InputImage {
|
||||
image_url: "file://image.png".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}];
|
||||
|
||||
let joined = content_items_to_text(&items);
|
||||
|
||||
@@ -649,8 +649,8 @@ fn image_data_url_estimate_adjustment(item: &ResponseItem) -> (i64, i64) {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
for content_item in content {
|
||||
if let ContentItem::InputImage { image_url } = content_item {
|
||||
accumulate(image_url, None);
|
||||
if let ContentItem::InputImage { image_url, detail } = content_item {
|
||||
accumulate(image_url, *detail);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use codex_protocol::AgentPath;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::models::BaseInstructions;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
@@ -382,6 +383,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: "https://example.com/img.png".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: "caption".to_string(),
|
||||
@@ -405,7 +407,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "https://example.com/result.png".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
},
|
||||
@@ -425,7 +427,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "https://example.com/js-repl-result.png".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
},
|
||||
@@ -506,6 +508,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: "https://example.com/img.png".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
],
|
||||
end_turn: None,
|
||||
@@ -715,7 +718,7 @@ fn replace_last_turn_images_replaces_tool_output_images() {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
@@ -752,6 +755,7 @@ fn replace_last_turn_images_does_not_touch_user_images() {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
@@ -1680,7 +1684,10 @@ fn image_data_url_payload_does_not_dominate_message_estimate() {
|
||||
ContentItem::InputText {
|
||||
text: "Here is the screenshot".to_string(),
|
||||
},
|
||||
ContentItem::InputImage { image_url },
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
@@ -1717,7 +1724,7 @@ fn image_data_url_payload_does_not_dominate_function_call_output_estimate() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
};
|
||||
@@ -1743,7 +1750,7 @@ fn image_data_url_payload_does_not_dominate_custom_tool_call_output_estimate() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
};
|
||||
@@ -1763,6 +1770,7 @@ fn non_base64_image_urls_are_unchanged() {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: "https://example.com/foo.png".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
@@ -1772,7 +1780,7 @@ fn non_base64_image_urls_are_unchanged() {
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "file:///tmp/foo.png".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
};
|
||||
@@ -1794,6 +1802,7 @@ fn data_url_without_base64_marker_is_unchanged() {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: "data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'/>".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
@@ -1814,7 +1823,7 @@ fn non_image_base64_data_url_is_unchanged() {
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
};
|
||||
@@ -1832,7 +1841,10 @@ fn mixed_case_data_url_markers_are_adjusted() {
|
||||
let item = ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage { image_url }],
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
};
|
||||
@@ -1859,9 +1871,11 @@ fn multiple_inline_images_apply_multiple_fixed_costs() {
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: image_url_one,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: image_url_two,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
],
|
||||
end_turn: None,
|
||||
|
||||
@@ -90,7 +90,7 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
|
||||
text_elements: Vec::new(),
|
||||
});
|
||||
}
|
||||
ContentItem::InputImage { image_url } => {
|
||||
ContentItem::InputImage { image_url, .. } => {
|
||||
content.push(UserInput::Image {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
|
||||
@@ -5,6 +5,7 @@ use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::items::WebSearchItem;
|
||||
use codex_protocol::items::build_hook_prompt_message;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ReasoningItemReasoningSummary;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
@@ -26,9 +27,11 @@ fn parses_user_message_with_text_and_two_images() {
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: img1.clone(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: img2.clone(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
],
|
||||
end_turn: None,
|
||||
@@ -66,6 +69,7 @@ fn skips_local_image_label_text() {
|
||||
ContentItem::InputText { text: label },
|
||||
ContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: "</image>".to_string(),
|
||||
@@ -145,6 +149,7 @@ fn skips_unnamed_image_label_text() {
|
||||
ContentItem::InputText { text: label },
|
||||
ContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: codex_protocol::models::image_close_tag_text(),
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use codex_code_mode::ImageDetail as CodeModeImageDetail;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
|
||||
@@ -36,7 +37,9 @@ impl IntoProtocol<FunctionCallOutputContentItem>
|
||||
codex_code_mode::FunctionCallOutputContentItem::InputImage { image_url, detail } => {
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
detail: detail.map(IntoProtocol::into_protocol),
|
||||
detail: detail
|
||||
.map(IntoProtocol::into_protocol)
|
||||
.or(Some(DEFAULT_IMAGE_DETAIL)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use crate::unified_exec::resolve_max_tokens;
|
||||
use codex_protocol::mcp::CallToolResult;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
@@ -463,10 +464,10 @@ pub(crate) fn response_input_to_code_mode_result(response: ResponseInputItem) ->
|
||||
| codex_protocol::models::ContentItem::OutputText { text } => {
|
||||
FunctionCallOutputContentItem::InputText { text }
|
||||
}
|
||||
codex_protocol::models::ContentItem::InputImage { image_url } => {
|
||||
codex_protocol::models::ContentItem::InputImage { image_url, detail } => {
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
detail: None,
|
||||
detail: detail.or(Some(DEFAULT_IMAGE_DETAIL)),
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use super::*;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use core_test_support::assert_regex_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
@@ -173,7 +174,7 @@ fn mcp_tool_output_response_item_preserves_content_items() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]
|
||||
.as_slice()
|
||||
@@ -239,7 +240,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "line 2".to_string(),
|
||||
@@ -259,7 +260,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "line 2".to_string(),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
@@ -133,7 +134,11 @@ impl ToolHandler for ViewImageHandler {
|
||||
} else {
|
||||
PromptImageMode::ResizeToFit
|
||||
};
|
||||
let image_detail = use_original_detail.then_some(ImageDetail::Original);
|
||||
let image_detail = Some(if use_original_detail {
|
||||
ImageDetail::Original
|
||||
} else {
|
||||
DEFAULT_IMAGE_DETAIL
|
||||
});
|
||||
|
||||
let image =
|
||||
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| {
|
||||
@@ -210,7 +215,7 @@ mod tests {
|
||||
fn code_mode_result_returns_image_url_object() {
|
||||
let output = ViewImageOutput {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
image_detail: None,
|
||||
image_detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
};
|
||||
|
||||
let result = output.code_mode_result(&ToolPayload::Function {
|
||||
@@ -221,7 +226,7 @@ mod tests {
|
||||
result,
|
||||
json!({
|
||||
"image_url": "data:image/png;base64,AAA",
|
||||
"detail": null,
|
||||
"detail": "high",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1225,9 +1225,9 @@ function parseImageDetail(detail) {
|
||||
if (typeof detail !== "string" || !detail) {
|
||||
throw new Error("codex.emitImage expected detail to be a non-empty string");
|
||||
}
|
||||
if (detail !== "original") {
|
||||
if (!["auto", "low", "high", "original"].includes(detail)) {
|
||||
throw new Error(
|
||||
'codex.emitImage only supports detail "original"; omit detail for default behavior',
|
||||
'codex.emitImage expected detail to be one of "auto", "low", "high", or "original"',
|
||||
);
|
||||
}
|
||||
return detail;
|
||||
@@ -1331,10 +1331,17 @@ function normalizeMcpImageData(data, mimeType) {
|
||||
}
|
||||
|
||||
function parseMcpImageDetail(meta) {
|
||||
if (!isPlainObject(meta) || meta["codex/imageDetail"] !== "original") {
|
||||
if (!isPlainObject(meta)) {
|
||||
return undefined;
|
||||
}
|
||||
return "original";
|
||||
const detail = meta["codex/imageDetail"];
|
||||
if (
|
||||
typeof detail !== "string" ||
|
||||
!["auto", "low", "high", "original"].includes(detail)
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
return detail;
|
||||
}
|
||||
|
||||
function parseMcpToolResult(result) {
|
||||
|
||||
@@ -10,6 +10,7 @@ use std::time::Duration;
|
||||
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
@@ -1750,7 +1751,8 @@ fn emitted_image_content_item(
|
||||
) -> FunctionCallOutputContentItem {
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
detail: normalize_output_image_detail(&turn.model_info, detail),
|
||||
detail: normalize_output_image_detail(&turn.model_info, detail)
|
||||
.or(Some(DEFAULT_IMAGE_DETAIL)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
|
||||
use codex_protocol::dynamic_tools::DynamicToolResponse;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
@@ -253,7 +254,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,abcd".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
};
|
||||
@@ -277,7 +278,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
|
||||
async fn emitted_image_content_item_preserves_explicit_non_original_detail() {
|
||||
let (_session, turn) = make_session_and_context().await;
|
||||
let content_item = emitted_image_content_item(
|
||||
&turn,
|
||||
@@ -288,7 +289,7 @@ async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
|
||||
content_item,
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(ImageDetail::Low),
|
||||
}
|
||||
);
|
||||
}
|
||||
@@ -314,7 +315,7 @@ async fn emitted_image_content_item_allows_explicit_original_detail_when_support
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn emitted_image_content_item_drops_explicit_original_detail_when_unsupported() {
|
||||
async fn emitted_image_content_item_defaults_to_high_for_unsupported_original_detail() {
|
||||
let (_session, turn) = make_session_and_context().await;
|
||||
|
||||
let content_item = emitted_image_content_item(
|
||||
@@ -327,7 +328,7 @@ async fn emitted_image_content_item_drops_explicit_original_detail_when_unsuppor
|
||||
content_item,
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}
|
||||
);
|
||||
}
|
||||
@@ -356,7 +357,7 @@ fn summarize_tool_call_response_for_multimodal_custom_output() {
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,abcd".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
};
|
||||
@@ -1213,7 +1214,7 @@ console.log(out.type);
|
||||
image_url:
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
|
||||
.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
.as_slice()
|
||||
);
|
||||
@@ -1268,7 +1269,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png" });
|
||||
image_url:
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
|
||||
.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
.as_slice()
|
||||
);
|
||||
@@ -1325,13 +1326,13 @@ await codex.emitImage(
|
||||
image_url:
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
|
||||
.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url:
|
||||
"data:image/gif;base64,R0lGODdhAQABAIAAAP///////ywAAAAAAQABAAACAkQBADs="
|
||||
.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]
|
||||
.as_slice()
|
||||
@@ -1387,7 +1388,7 @@ console.log("cell-complete");
|
||||
image_url:
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
|
||||
.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
.as_slice()
|
||||
);
|
||||
@@ -1465,11 +1466,11 @@ console.log("helpers-ran");
|
||||
vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: data_url.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: data_url.to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]
|
||||
);
|
||||
@@ -1701,7 +1702,7 @@ await codex.emitImage("DATA:image/png;base64,AAA");
|
||||
result.content_items.as_slice(),
|
||||
[FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "DATA:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
.as_slice()
|
||||
);
|
||||
@@ -1751,10 +1752,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: "ultra" });
|
||||
)
|
||||
.await
|
||||
.expect_err("invalid detail should fail");
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("only supports detail \"original\"")
|
||||
);
|
||||
assert!(err.to_string().contains("expected detail to be one of"));
|
||||
assert!(session.get_pending_input().await.is_empty());
|
||||
|
||||
Ok(())
|
||||
@@ -1804,7 +1802,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: null });
|
||||
result.content_items.as_slice(),
|
||||
[FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
.as_slice()
|
||||
);
|
||||
|
||||
@@ -24,6 +24,7 @@ use codex_protocol::config_types::Settings;
|
||||
use codex_protocol::config_types::Verbosity;
|
||||
use codex_protocol::error::CodexErr;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
@@ -511,6 +512,7 @@ async fn resume_replays_legacy_js_repl_image_rollout_shapes() {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: legacy_image_url.to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
|
||||
@@ -1970,14 +1970,16 @@ image("data:image/png;base64,AAA");
|
||||
items[1],
|
||||
serde_json::json!({
|
||||
"type": "input_image",
|
||||
"image_url": "https://example.com/image.jpg"
|
||||
"image_url": "https://example.com/image.jpg",
|
||||
"detail": "high"
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
items[2],
|
||||
serde_json::json!({
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAA"
|
||||
"image_url": "data:image/png;base64,AAA",
|
||||
"detail": "high"
|
||||
}),
|
||||
);
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use anyhow::Context;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
@@ -51,7 +52,7 @@ fn find_user_message_with_image(text: &str) -> Option<ResponseItem> {
|
||||
fn extract_image_url(item: &ResponseItem) -> Option<String> {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => content.iter().find_map(|span| match span {
|
||||
ContentItem::InputImage { image_url } => Some(image_url.clone()),
|
||||
ContentItem::InputImage { image_url, .. } => Some(image_url.clone()),
|
||||
_ => None,
|
||||
}),
|
||||
_ => None,
|
||||
@@ -150,7 +151,10 @@ async fn copy_paste_local_image_persists_rollout_request_shape() -> anyhow::Resu
|
||||
ContentItem::InputText {
|
||||
text: codex_protocol::models::local_image_open_tag_text(/*label_number*/ 1),
|
||||
},
|
||||
ContentItem::InputImage { image_url },
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: codex_protocol::models::image_close_tag_text(),
|
||||
},
|
||||
@@ -234,7 +238,10 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()>
|
||||
ContentItem::InputText {
|
||||
text: codex_protocol::models::image_open_tag_text(),
|
||||
},
|
||||
ContentItem::InputImage { image_url },
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: codex_protocol::models::image_close_tag_text(),
|
||||
},
|
||||
|
||||
@@ -787,7 +787,8 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> {
|
||||
output[1],
|
||||
json!({
|
||||
"type": "input_image",
|
||||
"image_url": OPENAI_PNG
|
||||
"image_url": OPENAI_PNG,
|
||||
"detail": "high"
|
||||
})
|
||||
);
|
||||
server.verify().await;
|
||||
|
||||
+1
-1
@@ -21,6 +21,6 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif
|
||||
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
05:message/user[4]:
|
||||
[01] <image>
|
||||
[02] <input_image:image_url>
|
||||
[02] <input_image:detail,image_url>
|
||||
[03] </image>
|
||||
[04] USER_THREE
|
||||
|
||||
@@ -533,7 +533,7 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
|
||||
);
|
||||
assert_eq!(
|
||||
arr[1],
|
||||
json!({"type": "input_image", "image_url": openai_png})
|
||||
json!({"type": "input_image", "image_url": openai_png, "detail": "high"})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -631,7 +631,10 @@ async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> {
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
assert_eq!(
|
||||
output_items[0].get("detail").and_then(Value::as_str),
|
||||
Some("high")
|
||||
);
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
@@ -728,7 +731,10 @@ async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> a
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
assert_eq!(
|
||||
output_items[0].get("detail").and_then(Value::as_str),
|
||||
Some("high")
|
||||
);
|
||||
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
@@ -829,7 +835,10 @@ async fn view_image_tool_does_not_force_original_resolution_with_capability_only
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
assert_eq!(
|
||||
output_items[0].get("detail").and_then(Value::as_str),
|
||||
Some("high")
|
||||
);
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
|
||||
@@ -208,9 +208,18 @@ pub enum ResponseInputItem {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ContentItem {
|
||||
InputText { text: String },
|
||||
InputImage { image_url: String },
|
||||
OutputText { text: String },
|
||||
InputText {
|
||||
text: String,
|
||||
},
|
||||
InputImage {
|
||||
image_url: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
detail: Option<ImageDetail>,
|
||||
},
|
||||
OutputText {
|
||||
text: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
@@ -222,6 +231,8 @@ pub enum ImageDetail {
|
||||
Original,
|
||||
}
|
||||
|
||||
pub const DEFAULT_IMAGE_DETAIL: ImageDetail = ImageDetail::High;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
/// Classifies an assistant message as interim commentary or final answer text.
|
||||
@@ -935,6 +946,7 @@ pub fn local_image_content_items_with_label_number(
|
||||
}
|
||||
items.push(ContentItem::InputImage {
|
||||
image_url: image.into_data_url(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
});
|
||||
if label_number.is_some() {
|
||||
items.push(ContentItem::InputText {
|
||||
@@ -1082,7 +1094,10 @@ impl From<Vec<UserInput>> for ResponseInputItem {
|
||||
ContentItem::InputText {
|
||||
text: image_open_tag_text(),
|
||||
},
|
||||
ContentItem::InputImage { image_url },
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: image_close_tag_text(),
|
||||
},
|
||||
@@ -1225,7 +1240,7 @@ impl From<crate::dynamic_tools::DynamicToolCallOutputContentItem>
|
||||
crate::dynamic_tools::DynamicToolCallOutputContentItem::InputImage { image_url } => {
|
||||
Self::InputImage {
|
||||
image_url,
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1462,9 +1477,13 @@ fn convert_mcp_content_to_items(
|
||||
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
|
||||
.and_then(serde_json::Value::as_str)
|
||||
.and_then(|detail| match detail {
|
||||
"auto" => Some(ImageDetail::Auto),
|
||||
"low" => Some(ImageDetail::Low),
|
||||
"high" => Some(ImageDetail::High),
|
||||
"original" => Some(ImageDetail::Original),
|
||||
_ => None,
|
||||
}),
|
||||
})
|
||||
.or(Some(DEFAULT_IMAGE_DETAIL)),
|
||||
}
|
||||
}
|
||||
Ok(McpContent::Unknown) | Err(_) => FunctionCallOutputContentItem::InputText {
|
||||
@@ -1555,7 +1574,7 @@ mod tests {
|
||||
items,
|
||||
vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,Zm9v".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
);
|
||||
}
|
||||
@@ -1630,7 +1649,7 @@ mod tests {
|
||||
items,
|
||||
vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,Zm9v".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
);
|
||||
}
|
||||
@@ -1653,7 +1672,7 @@ mod tests {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "line 2".to_string(),
|
||||
@@ -1672,7 +1691,7 @@ mod tests {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
];
|
||||
|
||||
@@ -1695,7 +1714,7 @@ mod tests {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]);
|
||||
|
||||
@@ -2267,7 +2286,7 @@ mod tests {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,BASE64".into(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]
|
||||
);
|
||||
@@ -2294,7 +2313,7 @@ mod tests {
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,BASE64".into(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
};
|
||||
@@ -2330,7 +2349,7 @@ mod tests {
|
||||
items,
|
||||
vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,BASE64".into(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}]
|
||||
);
|
||||
|
||||
@@ -2370,7 +2389,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_unknown_mcp_image_detail_metadata() -> Result<()> {
|
||||
fn preserves_standard_detail_metadata_on_mcp_images() -> Result<()> {
|
||||
let call_tool_result = CallToolResult {
|
||||
content: vec![serde_json::json!({
|
||||
"type": "image",
|
||||
@@ -2394,7 +2413,7 @@ mod tests {
|
||||
items,
|
||||
vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,BASE64".into(),
|
||||
detail: None,
|
||||
detail: Some(ImageDetail::High),
|
||||
}]
|
||||
);
|
||||
|
||||
@@ -2574,7 +2593,10 @@ mod tests {
|
||||
ContentItem::InputText {
|
||||
text: image_open_tag_text(),
|
||||
},
|
||||
ContentItem::InputImage { image_url },
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: image_close_tag_text(),
|
||||
},
|
||||
@@ -2779,7 +2801,13 @@ mod tests {
|
||||
text: image_open_tag_text(),
|
||||
})
|
||||
);
|
||||
assert_eq!(content.get(1), Some(&ContentItem::InputImage { image_url }));
|
||||
assert_eq!(
|
||||
content.get(1),
|
||||
Some(&ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
content.get(2),
|
||||
Some(&ContentItem::InputText {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
use codex_protocol::openai_models::ModelInfo;
|
||||
@@ -14,7 +15,8 @@ pub fn normalize_output_image_detail(
|
||||
Some(ImageDetail::Original) if can_request_original_image_detail(model_info) => {
|
||||
Some(ImageDetail::Original)
|
||||
}
|
||||
Some(ImageDetail::Original) | Some(_) | None => None,
|
||||
Some(ImageDetail::Original) | None => None,
|
||||
Some(ImageDetail::Auto | ImageDetail::Low | ImageDetail::High) => detail,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +32,7 @@ pub fn sanitize_original_image_detail(
|
||||
if let FunctionCallOutputContentItem::InputImage { detail, .. } = item
|
||||
&& matches!(detail, Some(ImageDetail::Original))
|
||||
{
|
||||
*detail = None;
|
||||
*detail = Some(DEFAULT_IMAGE_DETAIL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use super::*;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
use codex_protocol::openai_models::ModelInfo;
|
||||
@@ -66,17 +67,21 @@ fn explicit_original_is_dropped_without_model_support() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unsupported_non_original_detail_is_dropped() {
|
||||
fn explicit_non_original_detail_is_preserved() {
|
||||
let model_info = model_info();
|
||||
|
||||
assert_eq!(
|
||||
normalize_output_image_detail(&model_info, Some(ImageDetail::Low)),
|
||||
None
|
||||
Some(ImageDetail::Low)
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_output_image_detail(&model_info, Some(ImageDetail::High)),
|
||||
Some(ImageDetail::High)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_original_drops_original_without_support() {
|
||||
fn sanitize_original_falls_back_to_high_without_support() {
|
||||
let mut items = vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "header".to_string(),
|
||||
@@ -101,7 +106,7 @@ fn sanitize_original_drops_original_without_support() {
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,BBB".to_string(),
|
||||
|
||||
@@ -5,6 +5,7 @@ use crate::formatted_truncate_text;
|
||||
use crate::formatted_truncate_text_content_items_with_policy;
|
||||
use crate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate_text;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
@@ -114,7 +115,7 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
FunctionCallOutputContentItem::InputText { text: t2.clone() },
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText { text: t3 },
|
||||
FunctionCallOutputContentItem::InputText { text: t4 },
|
||||
@@ -142,7 +143,7 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
output[2],
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -214,7 +215,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:one".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "efgh".to_string(),
|
||||
@@ -224,7 +225,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:two".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
];
|
||||
|
||||
@@ -239,11 +240,11 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:one".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:two".to_string(),
|
||||
detail: None,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user