core: resize all history images behind a feature flag (#27247)

## Summary

Adds complete client-side image preparation behind the default-off
`resize_all_images` feature flag.

When enabled, local image producers defer decoding and resizing. Images
are prepared centrally before insertion into conversation history,
covering user input, `view_image`, and structured tool-output images.

## Behavior

- Processes base64 `data:` images in messages and function/custom tool
outputs.
- Leaves non-data URLs, including HTTP(S) URLs, unchanged.
- Applies image-detail budgets:
  - `high` and omitted: 2048px maximum dimension and 2.5K 32px patches.
  - `original`: 6000px maximum dimension and 10K 32px patches.
  - `auto`: uses the same 2048px / 2.5K-patch budget as high.
  - `low`: unsupported and replaced with an actionable placeholder.
- Preserves original image bytes when no resize or format conversion is
needed.
- Enforces the shared 1 GiB encoded and decoded data-URL sanity limits.
- Replaces only an image that fails preparation, preserving sibling
content and tool-output metadata.
- Uses bounded placeholders distinguishing generic processing failures,
oversized images, and unsupported `low` detail.
- Prepares resumed and forked history before installing it as live
history without modifying persisted rollouts.

## Flag-Off Behavior

When `resize_all_images` is disabled:

- Existing local user-input and `view_image` processing remains
unchanged.
- Existing decoding and error behavior remains unchanged.
- Arbitrary tool-output images are not processed.
- HTTP(S) image URLs continue to be forwarded unchanged.


#### [git stack](https://github.com/magus/git-stack-cli)
-  `1` https://github.com/openai/codex/pull/27245
- 👉 `2` https://github.com/openai/codex/pull/27247
-  `3` https://github.com/openai/codex/pull/27246
-  `4` https://github.com/openai/codex/pull/27266
This commit is contained in:
Curtis 'Fjord' Hawthorne
2026-06-10 19:21:24 -07:00
committed by GitHub
Unverified
parent 9d87b771ce
commit a6f435ea94
15 changed files with 907 additions and 58 deletions
+6
View File
@@ -576,6 +576,9 @@
"request_rule": {
"type": "boolean"
},
"resize_all_images": {
"type": "boolean"
},
"responses_websockets": {
"type": "boolean"
},
@@ -4708,6 +4711,9 @@
"request_rule": {
"type": "boolean"
},
"resize_all_images": {
"type": "boolean"
},
"responses_websockets": {
"type": "boolean"
},
+122
View File
@@ -0,0 +1,122 @@
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ImageDetail;
use codex_protocol::models::ResponseItem;
use codex_utils_image::ImageProcessingError;
use codex_utils_image::PromptImageMode;
use codex_utils_image::PromptImageResizeLimits;
use codex_utils_image::load_data_url_for_prompt;
use tracing::warn;
pub(crate) const IMAGE_PROCESSING_ERROR_PLACEHOLDER: &str =
"image content omitted because it could not be processed";
const IMAGE_TOO_LARGE_PLACEHOLDER: &str =
"image content omitted because it exceeded the supported size limit; use a smaller image";
const UNSUPPORTED_LOW_DETAIL_PLACEHOLDER: &str = "image content omitted because detail 'low' is not supported; use 'high', 'original', or 'auto'";
const HIGH_DETAIL_LIMITS: PromptImageResizeLimits = PromptImageResizeLimits {
max_dimension: 2048,
max_patches: 2_500,
};
const ORIGINAL_DETAIL_LIMITS: PromptImageResizeLimits = PromptImageResizeLimits {
max_dimension: 6000,
max_patches: 10_000,
};
#[derive(Debug, thiserror::Error)]
enum ImagePreparationError {
#[error("image detail `low` is not supported")]
UnsupportedLowDetail,
#[error(transparent)]
Processing(#[from] ImageProcessingError),
}
impl ImagePreparationError {
fn placeholder(&self) -> &'static str {
match self {
ImagePreparationError::UnsupportedLowDetail => UNSUPPORTED_LOW_DETAIL_PLACEHOLDER,
ImagePreparationError::Processing(ImageProcessingError::ImageTooLarge { .. }) => {
IMAGE_TOO_LARGE_PLACEHOLDER
}
ImagePreparationError::Processing(_) => IMAGE_PROCESSING_ERROR_PLACEHOLDER,
}
}
}
pub(crate) fn prepare_response_items(items: &mut [ResponseItem]) {
for item in items {
match item {
ResponseItem::Message { content, .. } => prepare_message_content(content),
ResponseItem::FunctionCallOutput { output, .. }
| ResponseItem::CustomToolCallOutput { output, .. } => {
if let Some(content) = output.content_items_mut() {
prepare_tool_output_content(content);
}
}
ResponseItem::Reasoning { .. }
| ResponseItem::AgentMessage { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::ToolSearchCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::ToolSearchOutput { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::ImageGenerationCall { .. }
| ResponseItem::Compaction { .. }
| ResponseItem::CompactionTrigger
| ResponseItem::ContextCompaction { .. }
| ResponseItem::Other => {}
}
}
}
fn prepare_message_content(items: &mut [ContentItem]) {
for item in items {
if let ContentItem::InputImage { image_url, detail } = item
&& is_data_url(image_url)
&& let Err(error) = prepare_image(image_url, *detail)
{
warn!(%error, "failed to prepare message image");
*item = ContentItem::InputText {
text: error.placeholder().to_string(),
};
}
}
}
fn prepare_tool_output_content(items: &mut [FunctionCallOutputContentItem]) {
for item in items {
if let FunctionCallOutputContentItem::InputImage { image_url, detail } = item
&& is_data_url(image_url)
&& let Err(error) = prepare_image(image_url, *detail)
{
warn!(%error, "failed to prepare tool output image");
*item = FunctionCallOutputContentItem::InputText {
text: error.placeholder().to_string(),
};
}
}
}
fn is_data_url(image_url: &str) -> bool {
image_url
.get(.."data:".len())
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("data:"))
}
fn prepare_image(
image_url: &mut String,
detail: Option<ImageDetail>,
) -> Result<(), ImagePreparationError> {
let limits = match detail {
None | Some(ImageDetail::Auto | ImageDetail::High) => HIGH_DETAIL_LIMITS,
Some(ImageDetail::Original) => ORIGINAL_DETAIL_LIMITS,
Some(ImageDetail::Low) => return Err(ImagePreparationError::UnsupportedLowDetail),
};
let image = load_data_url_for_prompt(image_url, PromptImageMode::ResizeWithLimits(limits))?;
*image_url = image.into_data_url();
Ok(())
}
#[cfg(test)]
#[path = "image_preparation_tests.rs"]
mod tests;
@@ -0,0 +1,193 @@
use std::io::Cursor;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_utils_image::data_url_from_bytes;
use image::DynamicImage;
use image::GenericImageView;
use image::ImageBuffer;
use image::ImageFormat;
use image::Rgba;
use pretty_assertions::assert_eq;
use super::*;
fn png_data_url(width: u32, height: u32) -> (String, Vec<u8>) {
let image = ImageBuffer::from_pixel(width, height, Rgba([10u8, 20, 30, 255]));
let mut encoded = Cursor::new(Vec::new());
DynamicImage::ImageRgba8(image)
.write_to(&mut encoded, ImageFormat::Png)
.expect("encode PNG");
let bytes = encoded.into_inner();
(data_url_from_bytes("image/png", &bytes), bytes)
}
fn decoded_image(image_url: &str) -> (Vec<u8>, DynamicImage) {
let (_, payload) = image_url.split_once(',').expect("data URL payload");
let bytes = BASE64_STANDARD.decode(payload).expect("decode image URL");
let image = image::load_from_memory(&bytes).expect("decode processed image");
(bytes, image)
}
#[test]
fn preparation_preserves_small_image_bytes_and_non_data_urls() {
let (data_url, original_bytes) = png_data_url(/*width*/ 64, /*height*/ 32);
let http_url = "https://example.com/image.png".to_string();
let mut items = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputImage {
image_url: data_url,
detail: Some(ImageDetail::High),
},
ContentItem::InputImage {
image_url: http_url.clone(),
detail: Some(ImageDetail::Low),
},
],
phase: None,
}];
prepare_response_items(&mut items);
let ResponseItem::Message { content, .. } = &items[0] else {
panic!("expected message");
};
let [
ContentItem::InputImage { image_url, .. },
ContentItem::InputImage {
image_url: preserved_http_url,
detail: Some(ImageDetail::Low),
},
] = content.as_slice()
else {
panic!("expected two images");
};
assert_eq!(decoded_image(image_url).0, original_bytes);
assert_eq!(preserved_http_url, &http_url);
}
#[test]
fn detail_policies_apply_the_expected_budgets() {
for (detail, input_dimensions, expected_dimensions) in [
(Some(ImageDetail::High), (2048, 2048), (1600, 1600)),
(Some(ImageDetail::Original), (6401, 100), (6000, 94)),
(Some(ImageDetail::Original), (3201, 3201), (3200, 3200)),
(Some(ImageDetail::Auto), (2048, 2048), (1600, 1600)),
(None, (2048, 2048), (1600, 1600)),
] {
let (image_url, _) = png_data_url(input_dimensions.0, input_dimensions.1);
let mut items = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage { image_url, detail }],
phase: None,
}];
prepare_response_items(&mut items);
let ResponseItem::Message { content, .. } = &items[0] else {
panic!("expected message");
};
let [ContentItem::InputImage { image_url, .. }] = content.as_slice() else {
panic!("expected image");
};
assert_eq!(decoded_image(image_url).1.dimensions(), expected_dimensions);
}
}
#[test]
fn preparation_replaces_only_failed_tool_images_and_preserves_metadata() {
let (valid_image_url, _) = png_data_url(/*width*/ 64, /*height*/ 32);
let expected_valid_image_url = valid_image_url.clone();
let mut items = vec![ResponseItem::CustomToolCallOutput {
call_id: "call-1".to_string(),
name: None,
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::ContentItems(vec![
FunctionCallOutputContentItem::InputText {
text: "before".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,%%%".to_string(),
detail: Some(ImageDetail::High),
},
FunctionCallOutputContentItem::InputImage {
image_url: data_url_from_bytes("image/png", b"not an image"),
detail: Some(ImageDetail::High),
},
FunctionCallOutputContentItem::InputImage {
image_url: valid_image_url.clone(),
detail: Some(ImageDetail::Low),
},
FunctionCallOutputContentItem::InputImage {
image_url: valid_image_url,
detail: Some(ImageDetail::High),
},
]),
success: Some(true),
},
}];
prepare_response_items(&mut items);
assert_eq!(
items,
vec![ResponseItem::CustomToolCallOutput {
call_id: "call-1".to_string(),
name: None,
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::ContentItems(vec![
FunctionCallOutputContentItem::InputText {
text: "before".to_string(),
},
FunctionCallOutputContentItem::InputText {
text: IMAGE_PROCESSING_ERROR_PLACEHOLDER.to_string(),
},
FunctionCallOutputContentItem::InputText {
text: IMAGE_PROCESSING_ERROR_PLACEHOLDER.to_string(),
},
FunctionCallOutputContentItem::InputText {
text: UNSUPPORTED_LOW_DETAIL_PLACEHOLDER.to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: expected_valid_image_url,
detail: Some(ImageDetail::High),
},
]),
success: Some(true),
},
}]
);
}
#[test]
fn preparation_errors_use_bounded_actionable_placeholders() {
let cases = [
(
ImagePreparationError::UnsupportedLowDetail,
UNSUPPORTED_LOW_DETAIL_PLACEHOLDER,
),
(
ImagePreparationError::Processing(ImageProcessingError::ImageTooLarge {
representation: "decoded input",
size: 2,
max: 1,
}),
IMAGE_TOO_LARGE_PLACEHOLDER,
),
(
ImagePreparationError::Processing(ImageProcessingError::InvalidDataUrl {
reason: "details remain in logs".to_string(),
}),
IMAGE_PROCESSING_ERROR_PLACEHOLDER,
),
];
for (error, expected) in cases {
assert_eq!(error.placeholder(), expected);
}
}
+1
View File
@@ -42,6 +42,7 @@ mod exec_policy;
mod git_info_tests;
mod guardian;
mod hook_runtime;
mod image_preparation;
mod installation_id;
pub(crate) mod landlock;
pub use landlock::spawn_command_under_linux_sandbox;
+1 -3
View File
@@ -5,7 +5,6 @@ use codex_exec_server::ExecServerRuntimePaths;
use codex_login::AuthManager;
use codex_protocol::error::CodexErr;
use codex_protocol::error::Result as CodexResult;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::SessionSource;
use codex_protocol::user_input::UserInput;
@@ -78,8 +77,7 @@ pub(crate) async fn build_prompt_input_from_session(
.await;
if !input.is_empty() {
let input_item = ResponseInputItem::from(input);
let response_item = ResponseItem::from(input_item);
let response_item = sess.response_item_from_user_input(turn_context.as_ref(), input);
sess.record_conversation_items(turn_context.as_ref(), std::slice::from_ref(&response_item))
.await;
}
+44 -2
View File
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::collections::HashSet;
@@ -31,6 +32,7 @@ use crate::context::PersonalitySpecInstructions;
use crate::default_skill_metadata_budget;
use crate::environment_selection::ResolvedTurnEnvironments;
use crate::exec_policy::ExecPolicyManager;
use crate::image_preparation::prepare_response_items;
use crate::parse_turn_item;
use crate::realtime_conversation::RealtimeConversationManager;
use crate::session_prefix::format_subagent_notification_message;
@@ -326,6 +328,7 @@ use codex_protocol::config_types::CollaborationMode;
use codex_protocol::config_types::Personality;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::config_types::WindowsSandboxLevel;
use codex_protocol::models::LocalImagePreparation;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
@@ -1300,13 +1303,20 @@ impl Session {
rollout_items: &[RolloutItem],
) -> Option<PreviousTurnSettings> {
let rollout_reconstruction::RolloutReconstruction {
history,
mut history,
previous_turn_settings,
reference_context_item,
window_id,
} = self
.reconstruct_history_from_rollout(turn_context, rollout_items)
.await;
if turn_context.features.enabled(Feature::ResizeAllImages) {
// Keep the recorded rollout unchanged. Prepare its reconstructed history before
// installing it, so legacy images are processed once for this resume or fork and
// will be processed again if the rollout is reconstructed in a future session.
// This meets image resizing requirements without modifying persisted rollouts.
prepare_response_items(&mut history);
}
{
let mut state = self.state.lock().await;
state.replace_history(history, reference_context_item);
@@ -2583,11 +2593,43 @@ impl Session {
/// Records conversation items: append to history, persist to rollout, and
/// notify clients observing raw response items.
pub(crate) fn prepare_conversation_items_for_history<'a>(
&self,
turn_context: &TurnContext,
items: &'a [ResponseItem],
) -> Cow<'a, [ResponseItem]> {
if !turn_context.features.enabled(Feature::ResizeAllImages) {
return Cow::Borrowed(items);
}
let mut prepared_items = items.to_vec();
prepare_response_items(&mut prepared_items);
Cow::Owned(prepared_items)
}
pub(crate) fn response_item_from_user_input(
&self,
turn_context: &TurnContext,
input: Vec<UserInput>,
) -> ResponseItem {
let local_image_preparation = if turn_context.features.enabled(Feature::ResizeAllImages) {
LocalImagePreparation::Defer
} else {
LocalImagePreparation::Process
};
ResponseItem::from(ResponseInputItem::from_user_input(
input,
local_image_preparation,
))
}
pub(crate) async fn record_conversation_items(
&self,
turn_context: &TurnContext,
items: &[ResponseItem],
) {
let items = self.prepare_conversation_items_for_history(turn_context, items);
let items = items.as_ref();
{
let mut state = self.state.lock().await;
state.record_items(items.iter(), turn_context.truncation_policy);
@@ -3211,7 +3253,7 @@ impl Session {
// Persist the user message to history, but emit the turn item from `UserInput` so
// UI-only `text_elements` are preserved. `ResponseItem::Message` does not carry
// those spans, and `record_response_item_and_emit_turn_item` would drop them.
let response_item = ResponseItem::from(ResponseInputItem::from(input.to_vec()));
let response_item = self.response_item_from_user_input(turn_context, input.to_vec());
self.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
.await;
let mut user_message_item = UserMessageItem::new(input);
+110
View File
@@ -42,7 +42,9 @@ use codex_protocol::models::ActivePermissionProfile;
use codex_protocol::models::BUILT_IN_PERMISSION_PROFILE_WORKSPACE;
use codex_protocol::models::FileSystemPermissions;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
use codex_protocol::models::PermissionProfile;
use codex_protocol::models::SandboxEnforcement;
use codex_protocol::openai_models::ModelServiceTier;
@@ -1641,6 +1643,114 @@ async fn record_initial_history_reconstructs_resumed_transcript() {
assert_eq!(expected, history.raw_items());
}
#[tokio::test]
async fn resize_all_images_prepares_failures_before_history_insertion() {
let (session, turn_context, _rx) = make_session_and_context_with_auth_and_config_and_rx(
CodexAuth::from_api_key("Test API Key"),
Vec::new(),
|config| {
let _ = config.features.enable(Feature::ResizeAllImages);
},
)
.await;
let item = ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::ContentItems(vec![
FunctionCallOutputContentItem::InputText {
text: "before".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,%%%".to_string(),
detail: Some(ImageDetail::High),
},
FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/image.png".to_string(),
detail: Some(ImageDetail::High),
},
]),
success: Some(true),
},
};
session
.record_conversation_items(turn_context.as_ref(), std::slice::from_ref(&item))
.await;
assert_eq!(
session.state.lock().await.clone_history().raw_items(),
&[ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::ContentItems(vec![
FunctionCallOutputContentItem::InputText {
text: "before".to_string(),
},
FunctionCallOutputContentItem::InputText {
text: "image content omitted because it could not be processed".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/image.png".to_string(),
detail: Some(ImageDetail::High),
},
]),
success: Some(true),
},
}]
);
}
#[tokio::test]
async fn resize_all_images_prepares_resumed_history_before_installing_it() {
let (session, _turn_context, _rx) = make_session_and_context_with_auth_and_config_and_rx(
CodexAuth::from_api_key("Test API Key"),
Vec::new(),
|config| {
let _ = config.features.enable(Feature::ResizeAllImages);
},
)
.await;
let resumed_item = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputImage {
image_url: "data:image/png;base64,%%%".to_string(),
detail: Some(ImageDetail::High),
},
ContentItem::InputText {
text: "keep me".to_string(),
},
],
phase: None,
};
session
.record_initial_history(InitialHistory::Resumed(ResumedHistory {
conversation_id: ThreadId::default(),
history: vec![RolloutItem::ResponseItem(resumed_item)],
rollout_path: Some(PathBuf::from("/tmp/resume.jsonl")),
}))
.await;
assert_eq!(
session.state.lock().await.clone_history().raw_items(),
&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputText {
text: "image content omitted because it could not be processed".to_string(),
},
ContentItem::InputText {
text: "keep me".to_string(),
},
],
phase: None,
}]
);
}
#[test]
fn resolve_multi_agent_version_handles_unset_and_legacy_history() {
let thread_id = ThreadId::default();
+31 -14
View File
@@ -1,3 +1,4 @@
use codex_features::Feature;
use codex_protocol::items::ImageViewItem;
use codex_protocol::items::TurnItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
@@ -8,6 +9,7 @@ use codex_protocol::models::ImageDetail;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::openai_models::InputModality;
use codex_utils_image::PromptImageMode;
use codex_utils_image::data_url_from_bytes;
use codex_utils_image::load_for_prompt_bytes;
use serde::Deserialize;
@@ -175,25 +177,30 @@ impl ViewImageHandler {
let can_request_original_detail = can_request_original_image_detail(&turn.model_info);
let use_original_detail =
can_request_original_detail && matches!(detail, Some(ViewImageDetail::Original));
let image_mode = if use_original_detail {
PromptImageMode::Original
} else {
PromptImageMode::ResizeToFit
};
let image_detail = if use_original_detail {
ImageDetail::Original
} else {
DEFAULT_IMAGE_DETAIL
};
let image =
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| {
FunctionCallError::RespondToModel(format!(
"unable to process image at `{}`: {error}",
abs_path.display()
))
})?;
let image_url = image.into_data_url();
let image_url = if turn.features.enabled(Feature::ResizeAllImages) {
// The history insertion path owns image decoding and resizing when this is enabled.
data_url_from_bytes("application/octet-stream", &file_bytes)
} else {
let image_mode = if use_original_detail {
PromptImageMode::Original
} else {
PromptImageMode::ResizeToFit
};
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode)
.map_err(|error| {
FunctionCallError::RespondToModel(format!(
"unable to process image at `{}`: {error}",
abs_path.display()
))
})?
.into_data_url()
};
let item = TurnItem::ImageView(ImageViewItem {
id: call_id,
@@ -218,7 +225,7 @@ pub struct ViewImageOutput {
impl ToolOutput for ViewImageOutput {
fn log_preview(&self) -> String {
self.image_url.clone()
format!("<image data URL omitted: {} bytes>", self.image_url.len())
}
fn success_for_logging(&self) -> bool {
@@ -264,6 +271,16 @@ mod tests {
use std::sync::Arc;
use tokio::sync::Mutex;
#[test]
fn log_preview_omits_image_data() {
let output = ViewImageOutput {
image_url: "data:image/png;base64,AAA".to_string(),
image_detail: DEFAULT_IMAGE_DETAIL,
};
assert_eq!(output.log_preview(), "<image data URL omitted: 25 bytes>");
}
#[test]
fn code_mode_result_returns_image_url_object() {
let output = ViewImageOutput {
+117 -6
View File
@@ -42,11 +42,16 @@ use core_test_support::test_codex::turn_permission_fields;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use core_test_support::wait_for_mcp_server;
use image::DynamicImage;
use image::GenericImageView;
use image::ImageBuffer;
use image::Rgba;
use pretty_assertions::assert_eq;
use serde_json::Value;
use std::collections::HashMap;
use std::collections::HashSet;
use std::fs;
use std::io::Cursor;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
@@ -168,12 +173,21 @@ async fn run_code_mode_turn_with_config(
code: &str,
configure: impl FnOnce(&mut Config) + Send + 'static,
) -> Result<(TestCodex, ResponseMock)> {
let mut builder = test_codex()
.with_model("test-gpt-5.1-codex")
.with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
configure(config);
});
run_code_mode_turn_with_model_and_config(server, prompt, code, "test-gpt-5.1-codex", configure)
.await
}
async fn run_code_mode_turn_with_model_and_config(
server: &MockServer,
prompt: &str,
code: &str,
model: &'static str,
configure: impl FnOnce(&mut Config) + Send + 'static,
) -> Result<(TestCodex, ResponseMock)> {
let mut builder = test_codex().with_model(model).with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
configure(config);
});
let test = builder.build(server).await?;
responses::mount_sse_once(
@@ -2536,6 +2550,103 @@ image("data:image/png;base64,AAA");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resize_all_images_replaces_malformed_code_mode_image_only() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn_with_config(
&server,
"use exec to return images",
r#"
image("https://example.com/image.jpg");
image("data:image/png;base64,AAA");
"#,
|config| {
let _ = config.features.enable(Feature::ResizeAllImages);
},
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(success, Some(false));
assert_eq!(items.len(), 3);
assert_eq!(
items[1],
serde_json::json!({
"type": "input_image",
"image_url": "https://example.com/image.jpg",
"detail": "high"
})
);
assert_eq!(
items[2],
serde_json::json!({
"type": "input_text",
"text": "image content omitted because it could not be processed"
})
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resize_all_images_resizes_explicit_original_code_mode_image() -> Result<()> {
skip_if_no_network!(Ok(()));
let original_dimensions = (6401, 100);
let image = ImageBuffer::from_pixel(
original_dimensions.0,
original_dimensions.1,
Rgba([20, 40, 60, 255]),
);
let mut encoded = Cursor::new(Vec::new());
DynamicImage::ImageRgba8(image).write_to(&mut encoded, image::ImageFormat::Png)?;
let image_data_url = format!(
"data:image/png;base64,{}",
BASE64_STANDARD.encode(encoded.into_inner())
);
let code = format!(
"image({}, \"original\");",
serde_json::to_string(&image_data_url)?
);
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn_with_model_and_config(
&server,
"use exec to return a large original-detail image",
&code,
"gpt-5.3-codex",
|config| {
config
.features
.enable(Feature::ResizeAllImages)
.expect("resize_all_images should be enabled");
},
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(success, Some(false));
let resized_url = items[1]["image_url"]
.as_str()
.expect("code mode image output should contain a data URL");
assert_eq!(items[1]["detail"], "original");
let (_, resized_base64) = resized_url
.split_once(',')
.expect("resized image should contain a data URL prefix");
let resized_bytes = BASE64_STANDARD.decode(resized_base64)?;
let resized = image::load_from_memory(&resized_bytes)?;
let resized_dimensions = resized.dimensions();
assert_eq!(resized_dimensions, (6000, 94));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_use_view_image_result_with_image_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
+109
View File
@@ -2,6 +2,8 @@
use anyhow::Context as _;
use anyhow::ensure;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use core_test_support::test_codex::local_selections;
use std::collections::HashMap;
use std::ffi::OsStr;
@@ -25,6 +27,7 @@ use codex_core::config::Config;
use codex_exec_server::CreateDirectoryOptions;
use codex_exec_server::Environment;
use codex_exec_server::HttpRequestParams;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_mcp::MCP_SANDBOX_STATE_META_CAPABILITY;
use codex_models_manager::manager::RefreshStrategy;
@@ -57,11 +60,16 @@ use core_test_support::test_codex::test_codex;
use core_test_support::test_codex::turn_permission_fields;
use core_test_support::wait_for_event;
use core_test_support::wait_for_mcp_server;
use image::DynamicImage;
use image::GenericImageView;
use image::ImageBuffer;
use image::Rgba;
use reqwest::Client;
use reqwest::StatusCode;
use serde_json::Value;
use serde_json::json;
use serial_test::serial;
use std::io::Cursor;
use tempfile::tempdir;
use tokio::process::Child;
use tokio::process::Command;
@@ -1257,6 +1265,107 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
#[serial(mcp_test_value)]
async fn stdio_image_responses_resize_large_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let call_id = "img-resize-1";
let server_name = "rmcp";
let namespace = format!("mcp__{server_name}");
let original_dimensions = (3000, 2000);
let image = ImageBuffer::from_pixel(
original_dimensions.0,
original_dimensions.1,
Rgba([20, 40, 60, 255]),
);
let mut encoded = Cursor::new(Vec::new());
DynamicImage::ImageRgba8(image).write_to(&mut encoded, image::ImageFormat::Png)?;
let image_data_url = format!(
"data:image/png;base64,{}",
BASE64_STANDARD.encode(encoded.into_inner())
);
let tool_arguments = serde_json::to_string(&json!({
"scenario": "image_only",
"data_url": image_data_url,
}))?;
mount_sse_once(
&server,
responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call_with_namespace(
call_id,
&namespace,
"image_scenario",
&tool_arguments,
),
responses::ev_completed("resp-1"),
]),
)
.await;
let final_mock = mount_sse_once(
&server,
responses::sse(vec![
responses::ev_assistant_message("msg-1", "done"),
responses::ev_completed("resp-2"),
]),
)
.await;
let rmcp_test_server_bin = remote_aware_stdio_server_bin()?;
let fixture = test_codex()
.with_config(move |config| {
config
.features
.enable(Feature::ResizeAllImages)
.expect("resize_all_images should be enabled");
insert_mcp_server(
config,
server_name,
stdio_transport(rmcp_test_server_bin, /*env*/ None, Vec::new()),
TestMcpServerOptions {
environment_id: remote_aware_environment_id(),
..Default::default()
},
);
})
.build_with_remote_env(&server)
.await?;
wait_for_mcp_server(&fixture.codex, server_name).await?;
fixture
.codex
.submit(read_only_user_turn(
&fixture,
"call the rmcp image_scenario tool",
))
.await?;
wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let output_item = final_mock.single_request().function_call_output(call_id);
assert_eq!(output_item["call_id"], call_id);
let output = output_item["output"]
.as_array()
.expect("image MCP output should be content items");
let resized_url = output[1]["image_url"]
.as_str()
.expect("MCP image output should contain a data URL");
assert_eq!(output[1]["detail"], "high");
let (_, resized_base64) = resized_url
.split_once(',')
.expect("resized image should contain a data URL prefix");
let resized_bytes = BASE64_STANDARD.decode(resized_base64)?;
let resized = image::load_from_memory(&resized_bytes)?;
let resized_dimensions = resized.dimensions();
assert_eq!(resized_dimensions, (1920, 1280));
server.verify().await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
#[serial(mcp_test_value)]
async fn stdio_image_responses_preserve_original_detail_metadata() -> anyhow::Result<()> {
+99 -3
View File
@@ -7,6 +7,7 @@ use codex_exec_server::CreateDirectoryOptions;
use codex_exec_server::LOCAL_ENVIRONMENT_ID;
use codex_exec_server::REMOTE_ENVIRONMENT_ID;
use codex_exec_server::RemoveOptions;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::PermissionProfile;
@@ -177,10 +178,15 @@ async fn write_workspace_png(
async fn assert_user_turn_local_image_resizes_to(
original_dimensions: (u32, u32),
expected_dimensions: (u32, u32),
resize_policy: TestImageResizePolicy,
) -> anyhow::Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex();
let mut builder = test_codex().with_config(move |config| {
if resize_policy == TestImageResizePolicy::AllImages {
let _ = config.features.enable(Feature::ResizeAllImages);
}
});
let test = builder.build_with_remote_env(&server).await?;
let TestCodex {
codex,
@@ -254,18 +260,42 @@ async fn assert_user_turn_local_image_resizes_to(
Ok(())
}
#[derive(Clone, Copy, Eq, PartialEq)]
enum TestImageResizePolicy {
Legacy,
AllImages,
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
assert_user_turn_local_image_resizes_to((2304, 864), (2048, 768)).await
assert_user_turn_local_image_resizes_to((2304, 864), (2048, 768), TestImageResizePolicy::Legacy)
.await
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_turn_with_vertical_local_image_resizes_to_square_bounds() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
assert_user_turn_local_image_resizes_to((1024, 4096), (512, 2048)).await
assert_user_turn_local_image_resizes_to(
(1024, 4096),
(512, 2048),
TestImageResizePolicy::Legacy,
)
.await
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resize_all_images_applies_patch_budget_to_local_user_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
assert_user_turn_local_image_resizes_to(
(2048, 2048),
(1600, 1600),
TestImageResizePolicy::AllImages,
)
.await
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -1248,6 +1278,72 @@ async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resize_all_images_turns_invalid_view_image_into_placeholder() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let _ = config.features.enable(Feature::ResizeAllImages);
});
let test = builder.build_with_remote_env(&server).await?;
let TestCodex {
codex,
session_configured,
..
} = &test;
let rel_path = "assets/invalid-image.json";
write_workspace_file(&test, rel_path, br#"{ "message": "hello" }"#.to_vec()).await?;
let call_id = "view-image-invalid-placeholder";
let arguments = serde_json::json!({ "path": rel_path }).to_string();
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "view_image", &arguments),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
codex
.submit(disabled_user_turn(
&test,
vec![UserInput::Text {
text: "please inspect the image".into(),
text_elements: Vec::new(),
}],
session_configured.model.clone(),
))
.await?;
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let request = second_mock.single_request();
assert_eq!(
request.function_call_output(call_id).get("output"),
Some(&serde_json::json!([{
"type": "input_text",
"text": "image content omitted because it could not be processed"
}]))
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
+8
View File
@@ -183,6 +183,8 @@ pub enum Feature {
ImageGeneration,
/// Replace hosted image generation with the standalone image-generation extension.
ImageGenExt,
/// Resize all inline data-URL images before recording them in history.
ResizeAllImages,
/// Allow prompting and installing missing MCP dependencies.
SkillMcpDependencyInstall,
/// Removed compatibility flag for deleted skill env var dependency prompting.
@@ -1086,6 +1088,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::ResizeAllImages,
key: "resize_all_images",
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::SkillMcpDependencyInstall,
key: "skill_mcp_dependency_install",
+57 -24
View File
@@ -4,6 +4,7 @@ use std::num::NonZeroUsize;
use std::path::Path;
use codex_utils_image::PromptImageMode;
use codex_utils_image::data_url_from_bytes;
use codex_utils_image::load_for_prompt_bytes;
use serde::Deserialize;
use serde::Deserializer;
@@ -1088,24 +1089,7 @@ pub fn local_image_content_items_with_label_number(
};
match load_for_prompt_bytes(path, file_bytes, mode) {
Ok(image) => {
let mut items = Vec::with_capacity(3);
if let Some(label_number) = label_number {
items.push(ContentItem::InputText {
text: local_image_open_tag_text_with_path(label_number, path),
});
}
items.push(ContentItem::InputImage {
image_url: image.into_data_url(),
detail: Some(detail),
});
if label_number.is_some() {
items.push(ContentItem::InputText {
text: LOCAL_IMAGE_CLOSE_TAG.to_string(),
});
}
items
}
Ok(image) => local_image_content_items(path, image.into_data_url(), label_number, detail),
Err(err) => match &err {
ImageProcessingError::Read { .. }
| ImageProcessingError::Encode { .. }
@@ -1126,6 +1110,36 @@ pub fn local_image_content_items_with_label_number(
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LocalImagePreparation {
Process,
Defer,
}
fn local_image_content_items(
path: &std::path::Path,
image_url: String,
label_number: Option<usize>,
detail: ImageDetail,
) -> Vec<ContentItem> {
let mut items = Vec::with_capacity(3);
if let Some(label_number) = label_number {
items.push(ContentItem::InputText {
text: local_image_open_tag_text_with_path(label_number, path),
});
}
items.push(ContentItem::InputImage {
image_url,
detail: Some(detail),
});
if label_number.is_some() {
items.push(ContentItem::InputText {
text: LOCAL_IMAGE_CLOSE_TAG.to_string(),
});
}
items
}
impl From<ResponseInputItem> for ResponseItem {
fn from(item: ResponseInputItem) -> Self {
match item {
@@ -1238,6 +1252,15 @@ pub enum ReasoningItemContent {
impl From<Vec<UserInput>> for ResponseInputItem {
fn from(items: Vec<UserInput>) -> Self {
Self::from_user_input(items, LocalImagePreparation::Process)
}
}
impl ResponseInputItem {
pub fn from_user_input(
items: Vec<UserInput>,
local_image_preparation: LocalImagePreparation,
) -> Self {
let mut image_index = 0;
Self::Message {
role: "user".to_string(),
@@ -1259,12 +1282,22 @@ impl From<Vec<UserInput>> for ResponseInputItem {
image_index += 1;
let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL);
match std::fs::read(&path) {
Ok(file_bytes) => local_image_content_items_with_label_number(
&path,
file_bytes,
Some(image_index),
detail,
),
Ok(file_bytes) => match local_image_preparation {
LocalImagePreparation::Process => {
local_image_content_items_with_label_number(
&path,
file_bytes,
Some(image_index),
detail,
)
}
LocalImagePreparation::Defer => local_image_content_items(
&path,
data_url_from_bytes("application/octet-stream", &file_bytes),
Some(image_index),
detail,
),
},
Err(err) => vec![local_image_error_placeholder(&path, err)],
}
}
+2 -4
View File
@@ -105,8 +105,7 @@ async fn preserves_large_image_in_original_mode() {
async fn data_url_processing_preserves_supported_source_bytes() {
let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
let original_bytes = image_bytes(&image, ImageFormat::Png);
let encoded = BASE64_STANDARD.encode(&original_bytes);
let image_url = format!("data:image/png;base64,{encoded}")
let image_url = data_url_from_bytes("image/png", &original_bytes)
.replacen("data:", "DATA:", 1)
.replacen(";base64,", ";BASE64,", 1);
@@ -123,8 +122,7 @@ async fn data_url_processing_preserves_supported_source_bytes() {
async fn data_url_processing_converts_gif_to_png() {
let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
let gif_bytes = image_bytes(&image, ImageFormat::Gif);
let encoded = BASE64_STANDARD.encode(&gif_bytes);
let image_url = format!("data:image/gif;base64,{encoded}");
let image_url = data_url_from_bytes("image/gif", &gif_bytes);
let processed = load_data_url_for_prompt(&image_url, PromptImageMode::ResizeToFit)
.expect("process GIF data URL");
+7 -2
View File
@@ -40,11 +40,16 @@ pub struct EncodedImage {
impl EncodedImage {
pub fn into_data_url(self) -> String {
let encoded = BASE64_STANDARD.encode(&self.bytes);
format!("data:{};base64,{encoded}", self.mime)
data_url_from_bytes(&self.mime, &self.bytes)
}
}
/// Wraps image bytes in a data URL without decoding or validating them.
pub fn data_url_from_bytes(mime: &str, bytes: &[u8]) -> String {
let encoded = BASE64_STANDARD.encode(bytes);
format!("data:{mime};base64,{encoded}")
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PromptImageMode {
ResizeToFit,