mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
core: resize all history images behind a feature flag (#27247)
## Summary Adds complete client-side image preparation behind the default-off `resize_all_images` feature flag. When enabled, local image producers defer decoding and resizing. Images are prepared centrally before insertion into conversation history, covering user input, `view_image`, and structured tool-output images. ## Behavior - Processes base64 `data:` images in messages and function/custom tool outputs. - Leaves non-data URLs, including HTTP(S) URLs, unchanged. - Applies image-detail budgets: - `high` and omitted: 2048px maximum dimension and 2.5K 32px patches. - `original`: 6000px maximum dimension and 10K 32px patches. - `auto`: uses the same 2048px / 2.5K-patch budget as high. - `low`: unsupported and replaced with an actionable placeholder. - Preserves original image bytes when no resize or format conversion is needed. - Enforces the shared 1 GiB encoded and decoded data-URL sanity limits. - Replaces only an image that fails preparation, preserving sibling content and tool-output metadata. - Uses bounded placeholders distinguishing generic processing failures, oversized images, and unsupported `low` detail. - Prepares resumed and forked history before installing it as live history without modifying persisted rollouts. ## Flag-Off Behavior When `resize_all_images` is disabled: - Existing local user-input and `view_image` processing remains unchanged. - Existing decoding and error behavior remains unchanged. - Arbitrary tool-output images are not processed. - HTTP(S) image URLs continue to be forwarded unchanged. #### [git stack](https://github.com/magus/git-stack-cli) - ✅ `1` https://github.com/openai/codex/pull/27245 - 👉 `2` https://github.com/openai/codex/pull/27247 - ⏳ `3` https://github.com/openai/codex/pull/27246 - ⏳ `4` https://github.com/openai/codex/pull/27266
This commit is contained in:
committed by
GitHub
Unverified
parent
9d87b771ce
commit
a6f435ea94
@@ -576,6 +576,9 @@
|
||||
"request_rule": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"resize_all_images": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"responses_websockets": {
|
||||
"type": "boolean"
|
||||
},
|
||||
@@ -4708,6 +4711,9 @@
|
||||
"request_rule": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"resize_all_images": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"responses_websockets": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_utils_image::ImageProcessingError;
|
||||
use codex_utils_image::PromptImageMode;
|
||||
use codex_utils_image::PromptImageResizeLimits;
|
||||
use codex_utils_image::load_data_url_for_prompt;
|
||||
use tracing::warn;
|
||||
|
||||
pub(crate) const IMAGE_PROCESSING_ERROR_PLACEHOLDER: &str =
|
||||
"image content omitted because it could not be processed";
|
||||
const IMAGE_TOO_LARGE_PLACEHOLDER: &str =
|
||||
"image content omitted because it exceeded the supported size limit; use a smaller image";
|
||||
const UNSUPPORTED_LOW_DETAIL_PLACEHOLDER: &str = "image content omitted because detail 'low' is not supported; use 'high', 'original', or 'auto'";
|
||||
|
||||
const HIGH_DETAIL_LIMITS: PromptImageResizeLimits = PromptImageResizeLimits {
|
||||
max_dimension: 2048,
|
||||
max_patches: 2_500,
|
||||
};
|
||||
const ORIGINAL_DETAIL_LIMITS: PromptImageResizeLimits = PromptImageResizeLimits {
|
||||
max_dimension: 6000,
|
||||
max_patches: 10_000,
|
||||
};
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum ImagePreparationError {
|
||||
#[error("image detail `low` is not supported")]
|
||||
UnsupportedLowDetail,
|
||||
#[error(transparent)]
|
||||
Processing(#[from] ImageProcessingError),
|
||||
}
|
||||
|
||||
impl ImagePreparationError {
|
||||
fn placeholder(&self) -> &'static str {
|
||||
match self {
|
||||
ImagePreparationError::UnsupportedLowDetail => UNSUPPORTED_LOW_DETAIL_PLACEHOLDER,
|
||||
ImagePreparationError::Processing(ImageProcessingError::ImageTooLarge { .. }) => {
|
||||
IMAGE_TOO_LARGE_PLACEHOLDER
|
||||
}
|
||||
ImagePreparationError::Processing(_) => IMAGE_PROCESSING_ERROR_PLACEHOLDER,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn prepare_response_items(items: &mut [ResponseItem]) {
|
||||
for item in items {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => prepare_message_content(content),
|
||||
ResponseItem::FunctionCallOutput { output, .. }
|
||||
| ResponseItem::CustomToolCallOutput { output, .. } => {
|
||||
if let Some(content) = output.content_items_mut() {
|
||||
prepare_tool_output_content(content);
|
||||
}
|
||||
}
|
||||
ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::AgentMessage { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::ToolSearchCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::ToolSearchOutput { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::ImageGenerationCall { .. }
|
||||
| ResponseItem::Compaction { .. }
|
||||
| ResponseItem::CompactionTrigger
|
||||
| ResponseItem::ContextCompaction { .. }
|
||||
| ResponseItem::Other => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_message_content(items: &mut [ContentItem]) {
|
||||
for item in items {
|
||||
if let ContentItem::InputImage { image_url, detail } = item
|
||||
&& is_data_url(image_url)
|
||||
&& let Err(error) = prepare_image(image_url, *detail)
|
||||
{
|
||||
warn!(%error, "failed to prepare message image");
|
||||
*item = ContentItem::InputText {
|
||||
text: error.placeholder().to_string(),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_tool_output_content(items: &mut [FunctionCallOutputContentItem]) {
|
||||
for item in items {
|
||||
if let FunctionCallOutputContentItem::InputImage { image_url, detail } = item
|
||||
&& is_data_url(image_url)
|
||||
&& let Err(error) = prepare_image(image_url, *detail)
|
||||
{
|
||||
warn!(%error, "failed to prepare tool output image");
|
||||
*item = FunctionCallOutputContentItem::InputText {
|
||||
text: error.placeholder().to_string(),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_data_url(image_url: &str) -> bool {
|
||||
image_url
|
||||
.get(.."data:".len())
|
||||
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("data:"))
|
||||
}
|
||||
|
||||
fn prepare_image(
|
||||
image_url: &mut String,
|
||||
detail: Option<ImageDetail>,
|
||||
) -> Result<(), ImagePreparationError> {
|
||||
let limits = match detail {
|
||||
None | Some(ImageDetail::Auto | ImageDetail::High) => HIGH_DETAIL_LIMITS,
|
||||
Some(ImageDetail::Original) => ORIGINAL_DETAIL_LIMITS,
|
||||
Some(ImageDetail::Low) => return Err(ImagePreparationError::UnsupportedLowDetail),
|
||||
};
|
||||
let image = load_data_url_for_prompt(image_url, PromptImageMode::ResizeWithLimits(limits))?;
|
||||
*image_url = image.into_data_url();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "image_preparation_tests.rs"]
|
||||
mod tests;
|
||||
@@ -0,0 +1,193 @@
|
||||
use std::io::Cursor;
|
||||
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_utils_image::data_url_from_bytes;
|
||||
use image::DynamicImage;
|
||||
use image::GenericImageView;
|
||||
use image::ImageBuffer;
|
||||
use image::ImageFormat;
|
||||
use image::Rgba;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn png_data_url(width: u32, height: u32) -> (String, Vec<u8>) {
|
||||
let image = ImageBuffer::from_pixel(width, height, Rgba([10u8, 20, 30, 255]));
|
||||
let mut encoded = Cursor::new(Vec::new());
|
||||
DynamicImage::ImageRgba8(image)
|
||||
.write_to(&mut encoded, ImageFormat::Png)
|
||||
.expect("encode PNG");
|
||||
let bytes = encoded.into_inner();
|
||||
(data_url_from_bytes("image/png", &bytes), bytes)
|
||||
}
|
||||
|
||||
fn decoded_image(image_url: &str) -> (Vec<u8>, DynamicImage) {
|
||||
let (_, payload) = image_url.split_once(',').expect("data URL payload");
|
||||
let bytes = BASE64_STANDARD.decode(payload).expect("decode image URL");
|
||||
let image = image::load_from_memory(&bytes).expect("decode processed image");
|
||||
(bytes, image)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preparation_preserves_small_image_bytes_and_non_data_urls() {
|
||||
let (data_url, original_bytes) = png_data_url(/*width*/ 64, /*height*/ 32);
|
||||
let http_url = "https://example.com/image.png".to_string();
|
||||
let mut items = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentItem::InputImage {
|
||||
image_url: data_url,
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: http_url.clone(),
|
||||
detail: Some(ImageDetail::Low),
|
||||
},
|
||||
],
|
||||
phase: None,
|
||||
}];
|
||||
|
||||
prepare_response_items(&mut items);
|
||||
|
||||
let ResponseItem::Message { content, .. } = &items[0] else {
|
||||
panic!("expected message");
|
||||
};
|
||||
let [
|
||||
ContentItem::InputImage { image_url, .. },
|
||||
ContentItem::InputImage {
|
||||
image_url: preserved_http_url,
|
||||
detail: Some(ImageDetail::Low),
|
||||
},
|
||||
] = content.as_slice()
|
||||
else {
|
||||
panic!("expected two images");
|
||||
};
|
||||
assert_eq!(decoded_image(image_url).0, original_bytes);
|
||||
assert_eq!(preserved_http_url, &http_url);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detail_policies_apply_the_expected_budgets() {
|
||||
for (detail, input_dimensions, expected_dimensions) in [
|
||||
(Some(ImageDetail::High), (2048, 2048), (1600, 1600)),
|
||||
(Some(ImageDetail::Original), (6401, 100), (6000, 94)),
|
||||
(Some(ImageDetail::Original), (3201, 3201), (3200, 3200)),
|
||||
(Some(ImageDetail::Auto), (2048, 2048), (1600, 1600)),
|
||||
(None, (2048, 2048), (1600, 1600)),
|
||||
] {
|
||||
let (image_url, _) = png_data_url(input_dimensions.0, input_dimensions.1);
|
||||
let mut items = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage { image_url, detail }],
|
||||
phase: None,
|
||||
}];
|
||||
|
||||
prepare_response_items(&mut items);
|
||||
|
||||
let ResponseItem::Message { content, .. } = &items[0] else {
|
||||
panic!("expected message");
|
||||
};
|
||||
let [ContentItem::InputImage { image_url, .. }] = content.as_slice() else {
|
||||
panic!("expected image");
|
||||
};
|
||||
assert_eq!(decoded_image(image_url).1.dimensions(), expected_dimensions);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preparation_replaces_only_failed_tool_images_and_preserves_metadata() {
|
||||
let (valid_image_url, _) = png_data_url(/*width*/ 64, /*height*/ 32);
|
||||
let expected_valid_image_url = valid_image_url.clone();
|
||||
let mut items = vec![ResponseItem::CustomToolCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
name: None,
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "before".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,%%%".to_string(),
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: data_url_from_bytes("image/png", b"not an image"),
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: valid_image_url.clone(),
|
||||
detail: Some(ImageDetail::Low),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: valid_image_url,
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
}];
|
||||
|
||||
prepare_response_items(&mut items);
|
||||
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![ResponseItem::CustomToolCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
name: None,
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "before".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: IMAGE_PROCESSING_ERROR_PLACEHOLDER.to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: IMAGE_PROCESSING_ERROR_PLACEHOLDER.to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: UNSUPPORTED_LOW_DETAIL_PLACEHOLDER.to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: expected_valid_image_url,
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preparation_errors_use_bounded_actionable_placeholders() {
|
||||
let cases = [
|
||||
(
|
||||
ImagePreparationError::UnsupportedLowDetail,
|
||||
UNSUPPORTED_LOW_DETAIL_PLACEHOLDER,
|
||||
),
|
||||
(
|
||||
ImagePreparationError::Processing(ImageProcessingError::ImageTooLarge {
|
||||
representation: "decoded input",
|
||||
size: 2,
|
||||
max: 1,
|
||||
}),
|
||||
IMAGE_TOO_LARGE_PLACEHOLDER,
|
||||
),
|
||||
(
|
||||
ImagePreparationError::Processing(ImageProcessingError::InvalidDataUrl {
|
||||
reason: "details remain in logs".to_string(),
|
||||
}),
|
||||
IMAGE_PROCESSING_ERROR_PLACEHOLDER,
|
||||
),
|
||||
];
|
||||
|
||||
for (error, expected) in cases {
|
||||
assert_eq!(error.placeholder(), expected);
|
||||
}
|
||||
}
|
||||
@@ -42,6 +42,7 @@ mod exec_policy;
|
||||
mod git_info_tests;
|
||||
mod guardian;
|
||||
mod hook_runtime;
|
||||
mod image_preparation;
|
||||
mod installation_id;
|
||||
pub(crate) mod landlock;
|
||||
pub use landlock::spawn_command_under_linux_sandbox;
|
||||
|
||||
@@ -5,7 +5,6 @@ use codex_exec_server::ExecServerRuntimePaths;
|
||||
use codex_login::AuthManager;
|
||||
use codex_protocol::error::CodexErr;
|
||||
use codex_protocol::error::Result as CodexResult;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
@@ -78,8 +77,7 @@ pub(crate) async fn build_prompt_input_from_session(
|
||||
.await;
|
||||
|
||||
if !input.is_empty() {
|
||||
let input_item = ResponseInputItem::from(input);
|
||||
let response_item = ResponseItem::from(input_item);
|
||||
let response_item = sess.response_item_from_user_input(turn_context.as_ref(), input);
|
||||
sess.record_conversation_items(turn_context.as_ref(), std::slice::from_ref(&response_item))
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
@@ -31,6 +32,7 @@ use crate::context::PersonalitySpecInstructions;
|
||||
use crate::default_skill_metadata_budget;
|
||||
use crate::environment_selection::ResolvedTurnEnvironments;
|
||||
use crate::exec_policy::ExecPolicyManager;
|
||||
use crate::image_preparation::prepare_response_items;
|
||||
use crate::parse_turn_item;
|
||||
use crate::realtime_conversation::RealtimeConversationManager;
|
||||
use crate::session_prefix::format_subagent_notification_message;
|
||||
@@ -326,6 +328,7 @@ use codex_protocol::config_types::CollaborationMode;
|
||||
use codex_protocol::config_types::Personality;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use codex_protocol::config_types::WindowsSandboxLevel;
|
||||
use codex_protocol::models::LocalImagePreparation;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
|
||||
@@ -1300,13 +1303,20 @@ impl Session {
|
||||
rollout_items: &[RolloutItem],
|
||||
) -> Option<PreviousTurnSettings> {
|
||||
let rollout_reconstruction::RolloutReconstruction {
|
||||
history,
|
||||
mut history,
|
||||
previous_turn_settings,
|
||||
reference_context_item,
|
||||
window_id,
|
||||
} = self
|
||||
.reconstruct_history_from_rollout(turn_context, rollout_items)
|
||||
.await;
|
||||
if turn_context.features.enabled(Feature::ResizeAllImages) {
|
||||
// Keep the recorded rollout unchanged. Prepare its reconstructed history before
|
||||
// installing it, so legacy images are processed once for this resume or fork and
|
||||
// will be processed again if the rollout is reconstructed in a future session.
|
||||
// This meets image resizing requirements without modifying persisted rollouts.
|
||||
prepare_response_items(&mut history);
|
||||
}
|
||||
{
|
||||
let mut state = self.state.lock().await;
|
||||
state.replace_history(history, reference_context_item);
|
||||
@@ -2583,11 +2593,43 @@ impl Session {
|
||||
|
||||
/// Records conversation items: append to history, persist to rollout, and
|
||||
/// notify clients observing raw response items.
|
||||
pub(crate) fn prepare_conversation_items_for_history<'a>(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
items: &'a [ResponseItem],
|
||||
) -> Cow<'a, [ResponseItem]> {
|
||||
if !turn_context.features.enabled(Feature::ResizeAllImages) {
|
||||
return Cow::Borrowed(items);
|
||||
}
|
||||
|
||||
let mut prepared_items = items.to_vec();
|
||||
prepare_response_items(&mut prepared_items);
|
||||
Cow::Owned(prepared_items)
|
||||
}
|
||||
|
||||
pub(crate) fn response_item_from_user_input(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
input: Vec<UserInput>,
|
||||
) -> ResponseItem {
|
||||
let local_image_preparation = if turn_context.features.enabled(Feature::ResizeAllImages) {
|
||||
LocalImagePreparation::Defer
|
||||
} else {
|
||||
LocalImagePreparation::Process
|
||||
};
|
||||
ResponseItem::from(ResponseInputItem::from_user_input(
|
||||
input,
|
||||
local_image_preparation,
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) async fn record_conversation_items(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
items: &[ResponseItem],
|
||||
) {
|
||||
let items = self.prepare_conversation_items_for_history(turn_context, items);
|
||||
let items = items.as_ref();
|
||||
{
|
||||
let mut state = self.state.lock().await;
|
||||
state.record_items(items.iter(), turn_context.truncation_policy);
|
||||
@@ -3211,7 +3253,7 @@ impl Session {
|
||||
// Persist the user message to history, but emit the turn item from `UserInput` so
|
||||
// UI-only `text_elements` are preserved. `ResponseItem::Message` does not carry
|
||||
// those spans, and `record_response_item_and_emit_turn_item` would drop them.
|
||||
let response_item = ResponseItem::from(ResponseInputItem::from(input.to_vec()));
|
||||
let response_item = self.response_item_from_user_input(turn_context, input.to_vec());
|
||||
self.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
|
||||
.await;
|
||||
let mut user_message_item = UserMessageItem::new(input);
|
||||
|
||||
@@ -42,7 +42,9 @@ use codex_protocol::models::ActivePermissionProfile;
|
||||
use codex_protocol::models::BUILT_IN_PERMISSION_PROFILE_WORKSPACE;
|
||||
use codex_protocol::models::FileSystemPermissions;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
use codex_protocol::models::SandboxEnforcement;
|
||||
use codex_protocol::openai_models::ModelServiceTier;
|
||||
@@ -1641,6 +1643,114 @@ async fn record_initial_history_reconstructs_resumed_transcript() {
|
||||
assert_eq!(expected, history.raw_items());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn resize_all_images_prepares_failures_before_history_insertion() {
|
||||
let (session, turn_context, _rx) = make_session_and_context_with_auth_and_config_and_rx(
|
||||
CodexAuth::from_api_key("Test API Key"),
|
||||
Vec::new(),
|
||||
|config| {
|
||||
let _ = config.features.enable(Feature::ResizeAllImages);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
let item = ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "before".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,%%%".to_string(),
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "https://example.com/image.png".to_string(),
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
|
||||
session
|
||||
.record_conversation_items(turn_context.as_ref(), std::slice::from_ref(&item))
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
session.state.lock().await.clone_history().raw_items(),
|
||||
&[ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "before".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "image content omitted because it could not be processed".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "https://example.com/image.png".to_string(),
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn resize_all_images_prepares_resumed_history_before_installing_it() {
|
||||
let (session, _turn_context, _rx) = make_session_and_context_with_auth_and_config_and_rx(
|
||||
CodexAuth::from_api_key("Test API Key"),
|
||||
Vec::new(),
|
||||
|config| {
|
||||
let _ = config.features.enable(Feature::ResizeAllImages);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
let resumed_item = ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,%%%".to_string(),
|
||||
detail: Some(ImageDetail::High),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: "keep me".to_string(),
|
||||
},
|
||||
],
|
||||
phase: None,
|
||||
};
|
||||
|
||||
session
|
||||
.record_initial_history(InitialHistory::Resumed(ResumedHistory {
|
||||
conversation_id: ThreadId::default(),
|
||||
history: vec![RolloutItem::ResponseItem(resumed_item)],
|
||||
rollout_path: Some(PathBuf::from("/tmp/resume.jsonl")),
|
||||
}))
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
session.state.lock().await.clone_history().raw_items(),
|
||||
&[ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentItem::InputText {
|
||||
text: "image content omitted because it could not be processed".to_string(),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: "keep me".to_string(),
|
||||
},
|
||||
],
|
||||
phase: None,
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_multi_agent_version_handles_unset_and_legacy_history() {
|
||||
let thread_id = ThreadId::default();
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use codex_features::Feature;
|
||||
use codex_protocol::items::ImageViewItem;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
@@ -8,6 +9,7 @@ use codex_protocol::models::ImageDetail;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::openai_models::InputModality;
|
||||
use codex_utils_image::PromptImageMode;
|
||||
use codex_utils_image::data_url_from_bytes;
|
||||
use codex_utils_image::load_for_prompt_bytes;
|
||||
use serde::Deserialize;
|
||||
|
||||
@@ -175,25 +177,30 @@ impl ViewImageHandler {
|
||||
let can_request_original_detail = can_request_original_image_detail(&turn.model_info);
|
||||
let use_original_detail =
|
||||
can_request_original_detail && matches!(detail, Some(ViewImageDetail::Original));
|
||||
let image_mode = if use_original_detail {
|
||||
PromptImageMode::Original
|
||||
} else {
|
||||
PromptImageMode::ResizeToFit
|
||||
};
|
||||
let image_detail = if use_original_detail {
|
||||
ImageDetail::Original
|
||||
} else {
|
||||
DEFAULT_IMAGE_DETAIL
|
||||
};
|
||||
|
||||
let image =
|
||||
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"unable to process image at `{}`: {error}",
|
||||
abs_path.display()
|
||||
))
|
||||
})?;
|
||||
let image_url = image.into_data_url();
|
||||
let image_url = if turn.features.enabled(Feature::ResizeAllImages) {
|
||||
// The history insertion path owns image decoding and resizing when this is enabled.
|
||||
data_url_from_bytes("application/octet-stream", &file_bytes)
|
||||
} else {
|
||||
let image_mode = if use_original_detail {
|
||||
PromptImageMode::Original
|
||||
} else {
|
||||
PromptImageMode::ResizeToFit
|
||||
};
|
||||
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode)
|
||||
.map_err(|error| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"unable to process image at `{}`: {error}",
|
||||
abs_path.display()
|
||||
))
|
||||
})?
|
||||
.into_data_url()
|
||||
};
|
||||
|
||||
let item = TurnItem::ImageView(ImageViewItem {
|
||||
id: call_id,
|
||||
@@ -218,7 +225,7 @@ pub struct ViewImageOutput {
|
||||
|
||||
impl ToolOutput for ViewImageOutput {
|
||||
fn log_preview(&self) -> String {
|
||||
self.image_url.clone()
|
||||
format!("<image data URL omitted: {} bytes>", self.image_url.len())
|
||||
}
|
||||
|
||||
fn success_for_logging(&self) -> bool {
|
||||
@@ -264,6 +271,16 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
#[test]
|
||||
fn log_preview_omits_image_data() {
|
||||
let output = ViewImageOutput {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
image_detail: DEFAULT_IMAGE_DETAIL,
|
||||
};
|
||||
|
||||
assert_eq!(output.log_preview(), "<image data URL omitted: 25 bytes>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_result_returns_image_url_object() {
|
||||
let output = ViewImageOutput {
|
||||
|
||||
@@ -42,11 +42,16 @@ use core_test_support::test_codex::turn_permission_fields;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use core_test_support::wait_for_mcp_server;
|
||||
use image::DynamicImage;
|
||||
use image::GenericImageView;
|
||||
use image::ImageBuffer;
|
||||
use image::Rgba;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::io::Cursor;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -168,12 +173,21 @@ async fn run_code_mode_turn_with_config(
|
||||
code: &str,
|
||||
configure: impl FnOnce(&mut Config) + Send + 'static,
|
||||
) -> Result<(TestCodex, ResponseMock)> {
|
||||
let mut builder = test_codex()
|
||||
.with_model("test-gpt-5.1-codex")
|
||||
.with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
configure(config);
|
||||
});
|
||||
run_code_mode_turn_with_model_and_config(server, prompt, code, "test-gpt-5.1-codex", configure)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn run_code_mode_turn_with_model_and_config(
|
||||
server: &MockServer,
|
||||
prompt: &str,
|
||||
code: &str,
|
||||
model: &'static str,
|
||||
configure: impl FnOnce(&mut Config) + Send + 'static,
|
||||
) -> Result<(TestCodex, ResponseMock)> {
|
||||
let mut builder = test_codex().with_model(model).with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
configure(config);
|
||||
});
|
||||
let test = builder.build(server).await?;
|
||||
|
||||
responses::mount_sse_once(
|
||||
@@ -2536,6 +2550,103 @@ image("data:image/png;base64,AAA");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resize_all_images_replaces_malformed_code_mode_image_only() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn_with_config(
|
||||
&server,
|
||||
"use exec to return images",
|
||||
r#"
|
||||
image("https://example.com/image.jpg");
|
||||
image("data:image/png;base64,AAA");
|
||||
"#,
|
||||
|config| {
|
||||
let _ = config.features.enable(Feature::ResizeAllImages);
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(success, Some(false));
|
||||
assert_eq!(items.len(), 3);
|
||||
assert_eq!(
|
||||
items[1],
|
||||
serde_json::json!({
|
||||
"type": "input_image",
|
||||
"image_url": "https://example.com/image.jpg",
|
||||
"detail": "high"
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
items[2],
|
||||
serde_json::json!({
|
||||
"type": "input_text",
|
||||
"text": "image content omitted because it could not be processed"
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resize_all_images_resizes_explicit_original_code_mode_image() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let original_dimensions = (6401, 100);
|
||||
let image = ImageBuffer::from_pixel(
|
||||
original_dimensions.0,
|
||||
original_dimensions.1,
|
||||
Rgba([20, 40, 60, 255]),
|
||||
);
|
||||
let mut encoded = Cursor::new(Vec::new());
|
||||
DynamicImage::ImageRgba8(image).write_to(&mut encoded, image::ImageFormat::Png)?;
|
||||
let image_data_url = format!(
|
||||
"data:image/png;base64,{}",
|
||||
BASE64_STANDARD.encode(encoded.into_inner())
|
||||
);
|
||||
let code = format!(
|
||||
"image({}, \"original\");",
|
||||
serde_json::to_string(&image_data_url)?
|
||||
);
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn_with_model_and_config(
|
||||
&server,
|
||||
"use exec to return a large original-detail image",
|
||||
&code,
|
||||
"gpt-5.3-codex",
|
||||
|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ResizeAllImages)
|
||||
.expect("resize_all_images should be enabled");
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(success, Some(false));
|
||||
let resized_url = items[1]["image_url"]
|
||||
.as_str()
|
||||
.expect("code mode image output should contain a data URL");
|
||||
assert_eq!(items[1]["detail"], "original");
|
||||
let (_, resized_base64) = resized_url
|
||||
.split_once(',')
|
||||
.expect("resized image should contain a data URL prefix");
|
||||
let resized_bytes = BASE64_STANDARD.decode(resized_base64)?;
|
||||
let resized = image::load_from_memory(&resized_bytes)?;
|
||||
let resized_dimensions = resized.dimensions();
|
||||
assert_eq!(resized_dimensions, (6000, 94));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_use_view_image_result_with_image_helper() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
use anyhow::Context as _;
|
||||
use anyhow::ensure;
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use core_test_support::test_codex::local_selections;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsStr;
|
||||
@@ -25,6 +27,7 @@ use codex_core::config::Config;
|
||||
use codex_exec_server::CreateDirectoryOptions;
|
||||
use codex_exec_server::Environment;
|
||||
use codex_exec_server::HttpRequestParams;
|
||||
use codex_features::Feature;
|
||||
use codex_login::CodexAuth;
|
||||
use codex_mcp::MCP_SANDBOX_STATE_META_CAPABILITY;
|
||||
use codex_models_manager::manager::RefreshStrategy;
|
||||
@@ -57,11 +60,16 @@ use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::test_codex::turn_permission_fields;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_mcp_server;
|
||||
use image::DynamicImage;
|
||||
use image::GenericImageView;
|
||||
use image::ImageBuffer;
|
||||
use image::Rgba;
|
||||
use reqwest::Client;
|
||||
use reqwest::StatusCode;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use serial_test::serial;
|
||||
use std::io::Cursor;
|
||||
use tempfile::tempdir;
|
||||
use tokio::process::Child;
|
||||
use tokio::process::Command;
|
||||
@@ -1257,6 +1265,107 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
#[serial(mcp_test_value)]
|
||||
async fn stdio_image_responses_resize_large_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let call_id = "img-resize-1";
|
||||
let server_name = "rmcp";
|
||||
let namespace = format!("mcp__{server_name}");
|
||||
|
||||
let original_dimensions = (3000, 2000);
|
||||
let image = ImageBuffer::from_pixel(
|
||||
original_dimensions.0,
|
||||
original_dimensions.1,
|
||||
Rgba([20, 40, 60, 255]),
|
||||
);
|
||||
let mut encoded = Cursor::new(Vec::new());
|
||||
DynamicImage::ImageRgba8(image).write_to(&mut encoded, image::ImageFormat::Png)?;
|
||||
let image_data_url = format!(
|
||||
"data:image/png;base64,{}",
|
||||
BASE64_STANDARD.encode(encoded.into_inner())
|
||||
);
|
||||
let tool_arguments = serde_json::to_string(&json!({
|
||||
"scenario": "image_only",
|
||||
"data_url": image_data_url,
|
||||
}))?;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call_with_namespace(
|
||||
call_id,
|
||||
&namespace,
|
||||
"image_scenario",
|
||||
&tool_arguments,
|
||||
),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let final_mock = mount_sse_once(
|
||||
&server,
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let rmcp_test_server_bin = remote_aware_stdio_server_bin()?;
|
||||
let fixture = test_codex()
|
||||
.with_config(move |config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ResizeAllImages)
|
||||
.expect("resize_all_images should be enabled");
|
||||
insert_mcp_server(
|
||||
config,
|
||||
server_name,
|
||||
stdio_transport(rmcp_test_server_bin, /*env*/ None, Vec::new()),
|
||||
TestMcpServerOptions {
|
||||
environment_id: remote_aware_environment_id(),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
})
|
||||
.build_with_remote_env(&server)
|
||||
.await?;
|
||||
wait_for_mcp_server(&fixture.codex, server_name).await?;
|
||||
|
||||
fixture
|
||||
.codex
|
||||
.submit(read_only_user_turn(
|
||||
&fixture,
|
||||
"call the rmcp image_scenario tool",
|
||||
))
|
||||
.await?;
|
||||
wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let output_item = final_mock.single_request().function_call_output(call_id);
|
||||
assert_eq!(output_item["call_id"], call_id);
|
||||
let output = output_item["output"]
|
||||
.as_array()
|
||||
.expect("image MCP output should be content items");
|
||||
let resized_url = output[1]["image_url"]
|
||||
.as_str()
|
||||
.expect("MCP image output should contain a data URL");
|
||||
assert_eq!(output[1]["detail"], "high");
|
||||
let (_, resized_base64) = resized_url
|
||||
.split_once(',')
|
||||
.expect("resized image should contain a data URL prefix");
|
||||
let resized_bytes = BASE64_STANDARD.decode(resized_base64)?;
|
||||
let resized = image::load_from_memory(&resized_bytes)?;
|
||||
let resized_dimensions = resized.dimensions();
|
||||
assert_eq!(resized_dimensions, (1920, 1280));
|
||||
|
||||
server.verify().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
#[serial(mcp_test_value)]
|
||||
async fn stdio_image_responses_preserve_original_detail_metadata() -> anyhow::Result<()> {
|
||||
|
||||
@@ -7,6 +7,7 @@ use codex_exec_server::CreateDirectoryOptions;
|
||||
use codex_exec_server::LOCAL_ENVIRONMENT_ID;
|
||||
use codex_exec_server::REMOTE_ENVIRONMENT_ID;
|
||||
use codex_exec_server::RemoveOptions;
|
||||
use codex_features::Feature;
|
||||
use codex_login::CodexAuth;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
@@ -177,10 +178,15 @@ async fn write_workspace_png(
|
||||
async fn assert_user_turn_local_image_resizes_to(
|
||||
original_dimensions: (u32, u32),
|
||||
expected_dimensions: (u32, u32),
|
||||
resize_policy: TestImageResizePolicy,
|
||||
) -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
if resize_policy == TestImageResizePolicy::AllImages {
|
||||
let _ = config.features.enable(Feature::ResizeAllImages);
|
||||
}
|
||||
});
|
||||
let test = builder.build_with_remote_env(&server).await?;
|
||||
let TestCodex {
|
||||
codex,
|
||||
@@ -254,18 +260,42 @@ async fn assert_user_turn_local_image_resizes_to(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum TestImageResizePolicy {
|
||||
Legacy,
|
||||
AllImages,
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
assert_user_turn_local_image_resizes_to((2304, 864), (2048, 768)).await
|
||||
assert_user_turn_local_image_resizes_to((2304, 864), (2048, 768), TestImageResizePolicy::Legacy)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_turn_with_vertical_local_image_resizes_to_square_bounds() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
assert_user_turn_local_image_resizes_to((1024, 4096), (512, 2048)).await
|
||||
assert_user_turn_local_image_resizes_to(
|
||||
(1024, 4096),
|
||||
(512, 2048),
|
||||
TestImageResizePolicy::Legacy,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resize_all_images_applies_patch_budget_to_local_user_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
assert_user_turn_local_image_resizes_to(
|
||||
(2048, 2048),
|
||||
(1600, 1600),
|
||||
TestImageResizePolicy::AllImages,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
@@ -1248,6 +1278,72 @@ async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resize_all_images_turns_invalid_view_image_into_placeholder() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let _ = config.features.enable(Feature::ResizeAllImages);
|
||||
});
|
||||
let test = builder.build_with_remote_env(&server).await?;
|
||||
let TestCodex {
|
||||
codex,
|
||||
session_configured,
|
||||
..
|
||||
} = &test;
|
||||
|
||||
let rel_path = "assets/invalid-image.json";
|
||||
write_workspace_file(&test, rel_path, br#"{ "message": "hello" }"#.to_vec()).await?;
|
||||
let call_id = "view-image-invalid-placeholder";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_mock = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(disabled_user_turn(
|
||||
&test,
|
||||
vec![UserInput::Text {
|
||||
text: "please inspect the image".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
session_configured.model.clone(),
|
||||
))
|
||||
.await?;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = second_mock.single_request();
|
||||
assert_eq!(
|
||||
request.function_call_output(call_id).get("output"),
|
||||
Some(&serde_json::json!([{
|
||||
"type": "input_text",
|
||||
"text": "image content omitted because it could not be processed"
|
||||
}]))
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
@@ -183,6 +183,8 @@ pub enum Feature {
|
||||
ImageGeneration,
|
||||
/// Replace hosted image generation with the standalone image-generation extension.
|
||||
ImageGenExt,
|
||||
/// Resize all inline data-URL images before recording them in history.
|
||||
ResizeAllImages,
|
||||
/// Allow prompting and installing missing MCP dependencies.
|
||||
SkillMcpDependencyInstall,
|
||||
/// Removed compatibility flag for deleted skill env var dependency prompting.
|
||||
@@ -1086,6 +1088,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ResizeAllImages,
|
||||
key: "resize_all_images",
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SkillMcpDependencyInstall,
|
||||
key: "skill_mcp_dependency_install",
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::num::NonZeroUsize;
|
||||
use std::path::Path;
|
||||
|
||||
use codex_utils_image::PromptImageMode;
|
||||
use codex_utils_image::data_url_from_bytes;
|
||||
use codex_utils_image::load_for_prompt_bytes;
|
||||
use serde::Deserialize;
|
||||
use serde::Deserializer;
|
||||
@@ -1088,24 +1089,7 @@ pub fn local_image_content_items_with_label_number(
|
||||
};
|
||||
|
||||
match load_for_prompt_bytes(path, file_bytes, mode) {
|
||||
Ok(image) => {
|
||||
let mut items = Vec::with_capacity(3);
|
||||
if let Some(label_number) = label_number {
|
||||
items.push(ContentItem::InputText {
|
||||
text: local_image_open_tag_text_with_path(label_number, path),
|
||||
});
|
||||
}
|
||||
items.push(ContentItem::InputImage {
|
||||
image_url: image.into_data_url(),
|
||||
detail: Some(detail),
|
||||
});
|
||||
if label_number.is_some() {
|
||||
items.push(ContentItem::InputText {
|
||||
text: LOCAL_IMAGE_CLOSE_TAG.to_string(),
|
||||
});
|
||||
}
|
||||
items
|
||||
}
|
||||
Ok(image) => local_image_content_items(path, image.into_data_url(), label_number, detail),
|
||||
Err(err) => match &err {
|
||||
ImageProcessingError::Read { .. }
|
||||
| ImageProcessingError::Encode { .. }
|
||||
@@ -1126,6 +1110,36 @@ pub fn local_image_content_items_with_label_number(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LocalImagePreparation {
|
||||
Process,
|
||||
Defer,
|
||||
}
|
||||
|
||||
fn local_image_content_items(
|
||||
path: &std::path::Path,
|
||||
image_url: String,
|
||||
label_number: Option<usize>,
|
||||
detail: ImageDetail,
|
||||
) -> Vec<ContentItem> {
|
||||
let mut items = Vec::with_capacity(3);
|
||||
if let Some(label_number) = label_number {
|
||||
items.push(ContentItem::InputText {
|
||||
text: local_image_open_tag_text_with_path(label_number, path),
|
||||
});
|
||||
}
|
||||
items.push(ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(detail),
|
||||
});
|
||||
if label_number.is_some() {
|
||||
items.push(ContentItem::InputText {
|
||||
text: LOCAL_IMAGE_CLOSE_TAG.to_string(),
|
||||
});
|
||||
}
|
||||
items
|
||||
}
|
||||
|
||||
impl From<ResponseInputItem> for ResponseItem {
|
||||
fn from(item: ResponseInputItem) -> Self {
|
||||
match item {
|
||||
@@ -1238,6 +1252,15 @@ pub enum ReasoningItemContent {
|
||||
|
||||
impl From<Vec<UserInput>> for ResponseInputItem {
|
||||
fn from(items: Vec<UserInput>) -> Self {
|
||||
Self::from_user_input(items, LocalImagePreparation::Process)
|
||||
}
|
||||
}
|
||||
|
||||
impl ResponseInputItem {
|
||||
pub fn from_user_input(
|
||||
items: Vec<UserInput>,
|
||||
local_image_preparation: LocalImagePreparation,
|
||||
) -> Self {
|
||||
let mut image_index = 0;
|
||||
Self::Message {
|
||||
role: "user".to_string(),
|
||||
@@ -1259,12 +1282,22 @@ impl From<Vec<UserInput>> for ResponseInputItem {
|
||||
image_index += 1;
|
||||
let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL);
|
||||
match std::fs::read(&path) {
|
||||
Ok(file_bytes) => local_image_content_items_with_label_number(
|
||||
&path,
|
||||
file_bytes,
|
||||
Some(image_index),
|
||||
detail,
|
||||
),
|
||||
Ok(file_bytes) => match local_image_preparation {
|
||||
LocalImagePreparation::Process => {
|
||||
local_image_content_items_with_label_number(
|
||||
&path,
|
||||
file_bytes,
|
||||
Some(image_index),
|
||||
detail,
|
||||
)
|
||||
}
|
||||
LocalImagePreparation::Defer => local_image_content_items(
|
||||
&path,
|
||||
data_url_from_bytes("application/octet-stream", &file_bytes),
|
||||
Some(image_index),
|
||||
detail,
|
||||
),
|
||||
},
|
||||
Err(err) => vec![local_image_error_placeholder(&path, err)],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,8 +105,7 @@ async fn preserves_large_image_in_original_mode() {
|
||||
async fn data_url_processing_preserves_supported_source_bytes() {
|
||||
let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
|
||||
let original_bytes = image_bytes(&image, ImageFormat::Png);
|
||||
let encoded = BASE64_STANDARD.encode(&original_bytes);
|
||||
let image_url = format!("data:image/png;base64,{encoded}")
|
||||
let image_url = data_url_from_bytes("image/png", &original_bytes)
|
||||
.replacen("data:", "DATA:", 1)
|
||||
.replacen(";base64,", ";BASE64,", 1);
|
||||
|
||||
@@ -123,8 +122,7 @@ async fn data_url_processing_preserves_supported_source_bytes() {
|
||||
async fn data_url_processing_converts_gif_to_png() {
|
||||
let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
|
||||
let gif_bytes = image_bytes(&image, ImageFormat::Gif);
|
||||
let encoded = BASE64_STANDARD.encode(&gif_bytes);
|
||||
let image_url = format!("data:image/gif;base64,{encoded}");
|
||||
let image_url = data_url_from_bytes("image/gif", &gif_bytes);
|
||||
|
||||
let processed = load_data_url_for_prompt(&image_url, PromptImageMode::ResizeToFit)
|
||||
.expect("process GIF data URL");
|
||||
|
||||
@@ -40,11 +40,16 @@ pub struct EncodedImage {
|
||||
|
||||
impl EncodedImage {
|
||||
pub fn into_data_url(self) -> String {
|
||||
let encoded = BASE64_STANDARD.encode(&self.bytes);
|
||||
format!("data:{};base64,{encoded}", self.mime)
|
||||
data_url_from_bytes(&self.mime, &self.bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps image bytes in a data URL without decoding or validating them.
|
||||
pub fn data_url_from_bytes(mime: &str, bytes: &[u8]) -> String {
|
||||
let encoded = BASE64_STANDARD.encode(bytes);
|
||||
format!("data:{mime};base64,{encoded}")
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum PromptImageMode {
|
||||
ResizeToFit,
|
||||
|
||||
Reference in New Issue
Block a user