diff --git a/codex-rs/code-mode/src/description.rs b/codex-rs/code-mode/src/description.rs index 1aee1f7a3..81e45add0 100644 --- a/codex-rs/code-mode/src/description.rs +++ b/codex-rs/code-mode/src/description.rs @@ -24,7 +24,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"Run JavaScript code to orchestrate/co - Global helpers: - `exit()`: Immediately ends the current script successfully (like an early return from the top level). - `text(value: string | number | boolean | undefined | null)`: Appends a text item. Non-string values are stringified with `JSON.stringify(...)` when possible. -- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null })`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. +- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request original detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument. - `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session. - `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing. - `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`. diff --git a/codex-rs/code-mode/src/runtime/callbacks.rs b/codex-rs/code-mode/src/runtime/callbacks.rs index ace6f6114..a9755f6eb 100644 --- a/codex-rs/code-mode/src/runtime/callbacks.rs +++ b/codex-rs/code-mode/src/runtime/callbacks.rs @@ -109,7 +109,20 @@ pub(super) fn image_callback( } else { args.get(0) }; - let image_item = match normalize_output_image(scope, value) { + let detail_override = if args.length() < 2 { + None + } else { + let detail = args.get(1); + if detail.is_string() { + Some(detail.to_rust_string_lossy(scope)) + } else if detail.is_null() || detail.is_undefined() { + None + } else { + throw_type_error(scope, "image detail must be a string when provided"); + return; + } + }; + let image_item = match normalize_output_image(scope, value, detail_override) { Ok(image_item) => image_item, Err(()) => return, }; diff --git a/codex-rs/code-mode/src/runtime/value.rs b/codex-rs/code-mode/src/runtime/value.rs index eb0280142..5c63434f4 100644 --- a/codex-rs/code-mode/src/runtime/value.rs +++ b/codex-rs/code-mode/src/runtime/value.rs @@ -3,6 +3,9 @@ use serde_json::Value as JsonValue; use crate::response::FunctionCallOutputContentItem; use crate::response::ImageDetail; +const IMAGE_HELPER_EXPECTS_MESSAGE: &str = "image expects a non-empty image URL string, an object with image_url and optional detail, or a raw MCP image block"; +const CODEX_IMAGE_DETAIL_META_KEY: &str = "codex/imageDetail"; + pub(super) fn serialize_output_text( scope: &mut v8::PinScope<'_, '_>, value: v8::Local<'_, v8::Value>, @@ -34,45 +37,25 @@ pub(super) fn serialize_output_text( pub(super) fn normalize_output_image( scope: &mut v8::PinScope<'_, '_>, value: v8::Local<'_, v8::Value>, + detail_override: Option, ) -> Result { let result = (|| -> Result { let (image_url, detail) = if value.is_string() { (value.to_rust_string_lossy(scope), None) } else if value.is_object() && !value.is_array() { - let object = v8::Local::::try_from(value).map_err(|_| { - "image expects a non-empty image URL string or an object with image_url and optional detail".to_string() - })?; - let image_url_key = v8::String::new(scope, "image_url") - .ok_or_else(|| "failed to allocate image helper keys".to_string())?; - let detail_key = v8::String::new(scope, "detail") - .ok_or_else(|| "failed to allocate image helper keys".to_string())?; - let image_url = object - .get(scope, image_url_key.into()) - .filter(|value| value.is_string()) - .map(|value| value.to_rust_string_lossy(scope)) - .ok_or_else(|| { - "image expects a non-empty image URL string or an object with image_url and optional detail" - .to_string() - })?; - let detail = match object.get(scope, detail_key.into()) { - Some(value) if value.is_string() => Some(value.to_rust_string_lossy(scope)), - Some(value) if value.is_null() || value.is_undefined() => None, - Some(_) => return Err("image detail must be a string when provided".to_string()), - None => None, - }; - (image_url, detail) + let object = v8::Local::::try_from(value) + .map_err(|_| IMAGE_HELPER_EXPECTS_MESSAGE.to_string())?; + if let Some(image) = parse_non_mcp_output_image(scope, object)? { + image + } else { + parse_mcp_output_image(scope, value)? + } } else { - return Err( - "image expects a non-empty image URL string or an object with image_url and optional detail" - .to_string(), - ); + return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string()); }; if image_url.is_empty() { - return Err( - "image expects a non-empty image URL string or an object with image_url and optional detail" - .to_string(), - ); + return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string()); } let lower = image_url.to_ascii_lowercase(); if !(lower.starts_with("http://") @@ -82,6 +65,7 @@ pub(super) fn normalize_output_image( return Err("image expects an http(s) or data URL".to_string()); } + let detail = detail_override.or(detail); let detail = match detail { Some(detail) => { let normalized = detail.to_ascii_lowercase(); @@ -112,6 +96,86 @@ pub(super) fn normalize_output_image( } } +fn parse_non_mcp_output_image( + scope: &mut v8::PinScope<'_, '_>, + object: v8::Local<'_, v8::Object>, +) -> Result)>, String> { + let image_url_key = v8::String::new(scope, "image_url") + .ok_or_else(|| "failed to allocate image helper keys".to_string())?; + let Some(image_url) = object.get(scope, image_url_key.into()) else { + return Ok(None); + }; + if image_url.is_undefined() { + return Ok(None); + } + if !image_url.is_string() { + return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string()); + } + let detail_key = v8::String::new(scope, "detail") + .ok_or_else(|| "failed to allocate image helper keys".to_string())?; + let detail = parse_image_detail_value(scope, object.get(scope, detail_key.into()))?; + Ok(Some((image_url.to_rust_string_lossy(scope), detail))) +} + +fn parse_mcp_output_image( + scope: &mut v8::PinScope<'_, '_>, + value: v8::Local<'_, v8::Value>, +) -> Result<(String, Option), String> { + let Some(result) = v8_value_to_json(scope, value)? else { + return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string()); + }; + let JsonValue::Object(result) = result else { + return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string()); + }; + let Some(item_type) = result.get("type").and_then(JsonValue::as_str) else { + return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string()); + }; + if item_type != "image" { + return Err(format!( + "image only accepts MCP image blocks, got \"{item_type}\"" + )); + } + let data = result + .get("data") + .and_then(JsonValue::as_str) + .ok_or_else(|| "image expected MCP image data".to_string())?; + if data.is_empty() { + return Err("image expected MCP image data".to_string()); + } + + let image_url = if data.to_ascii_lowercase().starts_with("data:") { + data.to_string() + } else { + let mime_type = result + .get("mimeType") + .or_else(|| result.get("mime_type")) + .and_then(JsonValue::as_str) + .filter(|mime_type| !mime_type.is_empty()) + .unwrap_or("application/octet-stream"); + format!("data:{mime_type};base64,{data}") + }; + let detail = result + .get("_meta") + .and_then(JsonValue::as_object) + .and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY)) + .and_then(JsonValue::as_str) + .filter(|detail| *detail == "original") + .map(str::to_string); + Ok((image_url, detail)) +} + +fn parse_image_detail_value<'s>( + scope: &mut v8::PinScope<'s, '_>, + value: Option>, +) -> Result, String> { + match value { + Some(value) if value.is_string() => Ok(Some(value.to_rust_string_lossy(scope))), + Some(value) if value.is_null() || value.is_undefined() => Ok(None), + Some(_) => Err("image detail must be a string when provided".to_string()), + None => Ok(None), + } +} + pub(super) fn v8_value_to_json( scope: &mut v8::PinScope<'_, '_>, value: v8::Local<'_, v8::Value>, diff --git a/codex-rs/code-mode/src/service.rs b/codex-rs/code-mode/src/service.rs index 8a3788013..23ca7a746 100644 --- a/codex-rs/code-mode/src/service.rs +++ b/codex-rs/code-mode/src/service.rs @@ -684,6 +684,154 @@ text(JSON.stringify(returnsUndefined)); ); } + #[tokio::test] + async fn image_helper_accepts_raw_mcp_image_block_with_original_detail() { + let service = CodeModeService::new(); + + let response = service + .execute(ExecuteRequest { + source: r#" +image({ + type: "image", + data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==", + mimeType: "image/png", + _meta: { "codex/imageDetail": "original" }, +}); +"# + .to_string(), + yield_time_ms: None, + ..execute_request("") + }) + .await + .unwrap(); + + assert_eq!( + response, + RuntimeResponse::Result { + cell_id: "1".to_string(), + content_items: vec![FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(), + detail: Some(crate::ImageDetail::Original), + }], + stored_values: HashMap::new(), + error_text: None, + } + ); + } + + #[tokio::test] + async fn image_helper_second_arg_overrides_explicit_object_detail() { + let service = CodeModeService::new(); + + let response = service + .execute(ExecuteRequest { + source: r#" +image( + { + image_url: "https://example.com/image.jpg", + detail: "low", + }, + "original", +); +"# + .to_string(), + yield_time_ms: None, + ..execute_request("") + }) + .await + .unwrap(); + + assert_eq!( + response, + RuntimeResponse::Result { + cell_id: "1".to_string(), + content_items: vec![FunctionCallOutputContentItem::InputImage { + image_url: "https://example.com/image.jpg".to_string(), + detail: Some(crate::ImageDetail::Original), + }], + stored_values: HashMap::new(), + error_text: None, + } + ); + } + + #[tokio::test] + async fn image_helper_second_arg_overrides_raw_mcp_image_detail() { + let service = CodeModeService::new(); + + let response = service + .execute(ExecuteRequest { + source: r#" +image( + { + type: "image", + data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==", + mimeType: "image/png", + _meta: { "codex/imageDetail": "original" }, + }, + "low", +); +"# + .to_string(), + yield_time_ms: None, + ..execute_request("") + }) + .await + .unwrap(); + + assert_eq!( + response, + RuntimeResponse::Result { + cell_id: "1".to_string(), + content_items: vec![FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(), + detail: Some(crate::ImageDetail::Low), + }], + stored_values: HashMap::new(), + error_text: None, + } + ); + } + + #[tokio::test] + async fn image_helper_rejects_raw_mcp_result_container() { + let service = CodeModeService::new(); + + let response = service + .execute(ExecuteRequest { + source: r#" +image({ + content: [ + { + type: "image", + data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==", + mimeType: "image/png", + _meta: { "codex/imageDetail": "original" }, + }, + ], + isError: false, +}); +"# + .to_string(), + yield_time_ms: None, + ..execute_request("") + }) + .await + .unwrap(); + + assert_eq!( + response, + RuntimeResponse::Result { + cell_id: "1".to_string(), + content_items: Vec::new(), + stored_values: HashMap::new(), + error_text: Some( + "image expects a non-empty image URL string, an object with image_url and optional detail, or a raw MCP image block".to_string(), + ), + } + ); + } + #[tokio::test] async fn terminate_waits_for_runtime_shutdown_before_responding() { let inner = test_inner(); diff --git a/codex-rs/core/src/original_image_detail.rs b/codex-rs/core/src/original_image_detail.rs index c3e5c3d9a..adfed321b 100644 --- a/codex-rs/core/src/original_image_detail.rs +++ b/codex-rs/core/src/original_image_detail.rs @@ -1,2 +1,3 @@ pub(crate) use codex_tools::can_request_original_image_detail; pub(crate) use codex_tools::normalize_output_image_detail; +pub(crate) use codex_tools::sanitize_original_image_detail; diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index 6659dfea7..c4f70a6f5 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -59,6 +59,7 @@ fn render_js_repl_instructions(config: &Config) -> Option { section.push_str("- `codex.emitImage(...)` adds one image to the outer `js_repl` function output each time you call it, so you can call it multiple times to emit multiple images. It accepts a data URL, a single `input_image` item, an object like `{ bytes, mimeType }`, or a raw tool response object with exactly one image and no text. It rejects mixed text-and-image content.\n"); section.push_str("- `codex.tool(...)` and `codex.emitImage(...)` keep stable helper identities across cells. Saved references and persisted objects can reuse them in later cells, but async callbacks that fire after a cell finishes still fail because no exec is active.\n"); section.push_str("- Request full-resolution image processing with `detail: \"original\"` only when the `view_image` tool schema includes a `detail` argument. The same availability applies to `codex.emitImage(...)`: if `view_image.detail` is present, you may also pass `detail: \"original\"` there. Use this when high-fidelity image perception or precise localization is needed, especially for CUA agents.\n"); + section.push_str("- Raw MCP image blocks can request the same behavior by returning `_meta: { \"codex/imageDetail\": \"original\" }` on the image content item.\n"); section.push_str("- Example of sharing an in-memory Playwright screenshot: `await codex.emitImage({ bytes: await page.screenshot({ type: \"jpeg\", quality: 85 }), mimeType: \"image/jpeg\", detail: \"original\" })`.\n"); section.push_str("- Example of sharing a local image tool result: `await codex.emitImage(codex.tool(\"view_image\", { path: \"/absolute/path\", detail: \"original\" }))`.\n"); section.push_str("- When encoding an image to send with `codex.emitImage(...)` or `view_image`, prefer JPEG at about 85 quality when lossy compression is acceptable; use PNG when transparency or lossless detail matters. Smaller uploads are faster and less likely to hit size limits.\n"); diff --git a/codex-rs/core/src/project_doc_tests.rs b/codex-rs/core/src/project_doc_tests.rs index 705801317..31f73805f 100644 --- a/codex-rs/core/src/project_doc_tests.rs +++ b/codex-rs/core/src/project_doc_tests.rs @@ -205,7 +205,7 @@ async fn js_repl_instructions_are_appended_when_enabled() { let res = get_user_instructions(&cfg) .await .expect("js_repl instructions expected"); - let expected = "## JavaScript REPL (Node)\n- Use `js_repl` for Node-backed JavaScript with top-level await in a persistent kernel.\n- `js_repl` is a freeform/custom tool. Direct `js_repl` calls must send raw JavaScript tool input (optionally with first-line `// codex-js-repl: timeout_ms=15000`). Do not wrap code in JSON (for example `{\"code\":\"...\"}`), quotes, or markdown code fences.\n- Helpers: `codex.cwd`, `codex.homeDir`, `codex.tmpDir`, `codex.tool(name, args?)`, and `codex.emitImage(imageLike)`.\n- `codex.tool` executes a normal tool call and resolves to the raw tool output object. Use it for shell and non-shell tools alike. Nested tool outputs stay inside JavaScript unless you emit them explicitly.\n- `codex.emitImage(...)` adds one image to the outer `js_repl` function output each time you call it, so you can call it multiple times to emit multiple images. It accepts a data URL, a single `input_image` item, an object like `{ bytes, mimeType }`, or a raw tool response object with exactly one image and no text. It rejects mixed text-and-image content.\n- `codex.tool(...)` and `codex.emitImage(...)` keep stable helper identities across cells. Saved references and persisted objects can reuse them in later cells, but async callbacks that fire after a cell finishes still fail because no exec is active.\n- Request full-resolution image processing with `detail: \"original\"` only when the `view_image` tool schema includes a `detail` argument. The same availability applies to `codex.emitImage(...)`: if `view_image.detail` is present, you may also pass `detail: \"original\"` there. Use this when high-fidelity image perception or precise localization is needed, especially for CUA agents.\n- Example of sharing an in-memory Playwright screenshot: `await codex.emitImage({ bytes: await page.screenshot({ type: \"jpeg\", quality: 85 }), mimeType: \"image/jpeg\", detail: \"original\" })`.\n- Example of sharing a local image tool result: `await codex.emitImage(codex.tool(\"view_image\", { path: \"/absolute/path\", detail: \"original\" }))`.\n- When encoding an image to send with `codex.emitImage(...)` or `view_image`, prefer JPEG at about 85 quality when lossy compression is acceptable; use PNG when transparency or lossless detail matters. Smaller uploads are faster and less likely to hit size limits.\n- Top-level bindings persist across cells. If a cell throws, prior bindings remain available and bindings that finished initializing before the throw often remain usable in later cells. For code you plan to reuse across cells, prefer declaring or assigning it in direct top-level statements before operations that might throw. If you hit `SyntaxError: Identifier 'x' has already been declared`, first reuse the existing binding, reassign a previously declared `let`, or pick a new descriptive name. Use `{ ... }` only for a short temporary block when you specifically need local scratch names; do not wrap an entire cell in block scope if you want those names reusable later. Reset the kernel with `js_repl_reset` only when you need a clean state.\n- Top-level static import declarations (for example `import x from \"./file.js\"`) are currently unsupported in `js_repl`; use dynamic imports with `await import(\"pkg\")`, `await import(\"./file.js\")`, or `await import(\"/abs/path/file.mjs\")` instead. Imported local files must be ESM `.js`/`.mjs` files and run in the same REPL VM context. Bare package imports always resolve from REPL-global search roots (`CODEX_JS_REPL_NODE_MODULE_DIRS`, then cwd), not relative to the imported file location. Local files may statically import only other local relative/absolute/`file://` `.js`/`.mjs` files; package and builtin imports from local files must stay dynamic. `import.meta.resolve()` returns importable strings such as `file://...`, bare package names, and `node:...` specifiers. Local file modules reload between execs, while top-level bindings persist until `js_repl_reset`.\n- Avoid direct access to `process.stdout` / `process.stderr` / `process.stdin`; it can corrupt the JSON line protocol. Use `console.log`, `codex.tool(...)`, and `codex.emitImage(...)`."; + let expected = "## JavaScript REPL (Node)\n- Use `js_repl` for Node-backed JavaScript with top-level await in a persistent kernel.\n- `js_repl` is a freeform/custom tool. Direct `js_repl` calls must send raw JavaScript tool input (optionally with first-line `// codex-js-repl: timeout_ms=15000`). Do not wrap code in JSON (for example `{\"code\":\"...\"}`), quotes, or markdown code fences.\n- Helpers: `codex.cwd`, `codex.homeDir`, `codex.tmpDir`, `codex.tool(name, args?)`, and `codex.emitImage(imageLike)`.\n- `codex.tool` executes a normal tool call and resolves to the raw tool output object. Use it for shell and non-shell tools alike. Nested tool outputs stay inside JavaScript unless you emit them explicitly.\n- `codex.emitImage(...)` adds one image to the outer `js_repl` function output each time you call it, so you can call it multiple times to emit multiple images. It accepts a data URL, a single `input_image` item, an object like `{ bytes, mimeType }`, or a raw tool response object with exactly one image and no text. It rejects mixed text-and-image content.\n- `codex.tool(...)` and `codex.emitImage(...)` keep stable helper identities across cells. Saved references and persisted objects can reuse them in later cells, but async callbacks that fire after a cell finishes still fail because no exec is active.\n- Request full-resolution image processing with `detail: \"original\"` only when the `view_image` tool schema includes a `detail` argument. The same availability applies to `codex.emitImage(...)`: if `view_image.detail` is present, you may also pass `detail: \"original\"` there. Use this when high-fidelity image perception or precise localization is needed, especially for CUA agents.\n- Raw MCP image blocks can request the same behavior by returning `_meta: { \"codex/imageDetail\": \"original\" }` on the image content item.\n- Example of sharing an in-memory Playwright screenshot: `await codex.emitImage({ bytes: await page.screenshot({ type: \"jpeg\", quality: 85 }), mimeType: \"image/jpeg\", detail: \"original\" })`.\n- Example of sharing a local image tool result: `await codex.emitImage(codex.tool(\"view_image\", { path: \"/absolute/path\", detail: \"original\" }))`.\n- When encoding an image to send with `codex.emitImage(...)` or `view_image`, prefer JPEG at about 85 quality when lossy compression is acceptable; use PNG when transparency or lossless detail matters. Smaller uploads are faster and less likely to hit size limits.\n- Top-level bindings persist across cells. If a cell throws, prior bindings remain available and bindings that finished initializing before the throw often remain usable in later cells. For code you plan to reuse across cells, prefer declaring or assigning it in direct top-level statements before operations that might throw. If you hit `SyntaxError: Identifier 'x' has already been declared`, first reuse the existing binding, reassign a previously declared `let`, or pick a new descriptive name. Use `{ ... }` only for a short temporary block when you specifically need local scratch names; do not wrap an entire cell in block scope if you want those names reusable later. Reset the kernel with `js_repl_reset` only when you need a clean state.\n- Top-level static import declarations (for example `import x from \"./file.js\"`) are currently unsupported in `js_repl`; use dynamic imports with `await import(\"pkg\")`, `await import(\"./file.js\")`, or `await import(\"/abs/path/file.mjs\")` instead. Imported local files must be ESM `.js`/`.mjs` files and run in the same REPL VM context. Bare package imports always resolve from REPL-global search roots (`CODEX_JS_REPL_NODE_MODULE_DIRS`, then cwd), not relative to the imported file location. Local files may statically import only other local relative/absolute/`file://` `.js`/`.mjs` files; package and builtin imports from local files must stay dynamic. `import.meta.resolve()` returns importable strings such as `file://...`, bare package names, and `node:...` specifiers. Local file modules reload between execs, while top-level bindings persist until `js_repl_reset`.\n- Avoid direct access to `process.stdout` / `process.stderr` / `process.stdin`; it can corrupt the JSON line protocol. Use `console.log`, `codex.tool(...)`, and `codex.emitImage(...)`."; assert_eq!(res, expected); } @@ -224,7 +224,7 @@ async fn js_repl_tools_only_instructions_are_feature_gated() { let res = get_user_instructions(&cfg) .await .expect("js_repl instructions expected"); - let expected = "## JavaScript REPL (Node)\n- Use `js_repl` for Node-backed JavaScript with top-level await in a persistent kernel.\n- `js_repl` is a freeform/custom tool. Direct `js_repl` calls must send raw JavaScript tool input (optionally with first-line `// codex-js-repl: timeout_ms=15000`). Do not wrap code in JSON (for example `{\"code\":\"...\"}`), quotes, or markdown code fences.\n- Helpers: `codex.cwd`, `codex.homeDir`, `codex.tmpDir`, `codex.tool(name, args?)`, and `codex.emitImage(imageLike)`.\n- `codex.tool` executes a normal tool call and resolves to the raw tool output object. Use it for shell and non-shell tools alike. Nested tool outputs stay inside JavaScript unless you emit them explicitly.\n- `codex.emitImage(...)` adds one image to the outer `js_repl` function output each time you call it, so you can call it multiple times to emit multiple images. It accepts a data URL, a single `input_image` item, an object like `{ bytes, mimeType }`, or a raw tool response object with exactly one image and no text. It rejects mixed text-and-image content.\n- `codex.tool(...)` and `codex.emitImage(...)` keep stable helper identities across cells. Saved references and persisted objects can reuse them in later cells, but async callbacks that fire after a cell finishes still fail because no exec is active.\n- Request full-resolution image processing with `detail: \"original\"` only when the `view_image` tool schema includes a `detail` argument. The same availability applies to `codex.emitImage(...)`: if `view_image.detail` is present, you may also pass `detail: \"original\"` there. Use this when high-fidelity image perception or precise localization is needed, especially for CUA agents.\n- Example of sharing an in-memory Playwright screenshot: `await codex.emitImage({ bytes: await page.screenshot({ type: \"jpeg\", quality: 85 }), mimeType: \"image/jpeg\", detail: \"original\" })`.\n- Example of sharing a local image tool result: `await codex.emitImage(codex.tool(\"view_image\", { path: \"/absolute/path\", detail: \"original\" }))`.\n- When encoding an image to send with `codex.emitImage(...)` or `view_image`, prefer JPEG at about 85 quality when lossy compression is acceptable; use PNG when transparency or lossless detail matters. Smaller uploads are faster and less likely to hit size limits.\n- Top-level bindings persist across cells. If a cell throws, prior bindings remain available and bindings that finished initializing before the throw often remain usable in later cells. For code you plan to reuse across cells, prefer declaring or assigning it in direct top-level statements before operations that might throw. If you hit `SyntaxError: Identifier 'x' has already been declared`, first reuse the existing binding, reassign a previously declared `let`, or pick a new descriptive name. Use `{ ... }` only for a short temporary block when you specifically need local scratch names; do not wrap an entire cell in block scope if you want those names reusable later. Reset the kernel with `js_repl_reset` only when you need a clean state.\n- Top-level static import declarations (for example `import x from \"./file.js\"`) are currently unsupported in `js_repl`; use dynamic imports with `await import(\"pkg\")`, `await import(\"./file.js\")`, or `await import(\"/abs/path/file.mjs\")` instead. Imported local files must be ESM `.js`/`.mjs` files and run in the same REPL VM context. Bare package imports always resolve from REPL-global search roots (`CODEX_JS_REPL_NODE_MODULE_DIRS`, then cwd), not relative to the imported file location. Local files may statically import only other local relative/absolute/`file://` `.js`/`.mjs` files; package and builtin imports from local files must stay dynamic. `import.meta.resolve()` returns importable strings such as `file://...`, bare package names, and `node:...` specifiers. Local file modules reload between execs, while top-level bindings persist until `js_repl_reset`.\n- Do not call tools directly; use `js_repl` + `codex.tool(...)` for all tool calls, including shell commands.\n- MCP tools (if any) can also be called by name via `codex.tool(...)`.\n- Avoid direct access to `process.stdout` / `process.stderr` / `process.stdin`; it can corrupt the JSON line protocol. Use `console.log`, `codex.tool(...)`, and `codex.emitImage(...)`."; + let expected = "## JavaScript REPL (Node)\n- Use `js_repl` for Node-backed JavaScript with top-level await in a persistent kernel.\n- `js_repl` is a freeform/custom tool. Direct `js_repl` calls must send raw JavaScript tool input (optionally with first-line `// codex-js-repl: timeout_ms=15000`). Do not wrap code in JSON (for example `{\"code\":\"...\"}`), quotes, or markdown code fences.\n- Helpers: `codex.cwd`, `codex.homeDir`, `codex.tmpDir`, `codex.tool(name, args?)`, and `codex.emitImage(imageLike)`.\n- `codex.tool` executes a normal tool call and resolves to the raw tool output object. Use it for shell and non-shell tools alike. Nested tool outputs stay inside JavaScript unless you emit them explicitly.\n- `codex.emitImage(...)` adds one image to the outer `js_repl` function output each time you call it, so you can call it multiple times to emit multiple images. It accepts a data URL, a single `input_image` item, an object like `{ bytes, mimeType }`, or a raw tool response object with exactly one image and no text. It rejects mixed text-and-image content.\n- `codex.tool(...)` and `codex.emitImage(...)` keep stable helper identities across cells. Saved references and persisted objects can reuse them in later cells, but async callbacks that fire after a cell finishes still fail because no exec is active.\n- Request full-resolution image processing with `detail: \"original\"` only when the `view_image` tool schema includes a `detail` argument. The same availability applies to `codex.emitImage(...)`: if `view_image.detail` is present, you may also pass `detail: \"original\"` there. Use this when high-fidelity image perception or precise localization is needed, especially for CUA agents.\n- Raw MCP image blocks can request the same behavior by returning `_meta: { \"codex/imageDetail\": \"original\" }` on the image content item.\n- Example of sharing an in-memory Playwright screenshot: `await codex.emitImage({ bytes: await page.screenshot({ type: \"jpeg\", quality: 85 }), mimeType: \"image/jpeg\", detail: \"original\" })`.\n- Example of sharing a local image tool result: `await codex.emitImage(codex.tool(\"view_image\", { path: \"/absolute/path\", detail: \"original\" }))`.\n- When encoding an image to send with `codex.emitImage(...)` or `view_image`, prefer JPEG at about 85 quality when lossy compression is acceptable; use PNG when transparency or lossless detail matters. Smaller uploads are faster and less likely to hit size limits.\n- Top-level bindings persist across cells. If a cell throws, prior bindings remain available and bindings that finished initializing before the throw often remain usable in later cells. For code you plan to reuse across cells, prefer declaring or assigning it in direct top-level statements before operations that might throw. If you hit `SyntaxError: Identifier 'x' has already been declared`, first reuse the existing binding, reassign a previously declared `let`, or pick a new descriptive name. Use `{ ... }` only for a short temporary block when you specifically need local scratch names; do not wrap an entire cell in block scope if you want those names reusable later. Reset the kernel with `js_repl_reset` only when you need a clean state.\n- Top-level static import declarations (for example `import x from \"./file.js\"`) are currently unsupported in `js_repl`; use dynamic imports with `await import(\"pkg\")`, `await import(\"./file.js\")`, or `await import(\"/abs/path/file.mjs\")` instead. Imported local files must be ESM `.js`/`.mjs` files and run in the same REPL VM context. Bare package imports always resolve from REPL-global search roots (`CODEX_JS_REPL_NODE_MODULE_DIRS`, then cwd), not relative to the imported file location. Local files may statically import only other local relative/absolute/`file://` `.js`/`.mjs` files; package and builtin imports from local files must stay dynamic. `import.meta.resolve()` returns importable strings such as `file://...`, bare package names, and `node:...` specifiers. Local file modules reload between execs, while top-level bindings persist until `js_repl_reset`.\n- Do not call tools directly; use `js_repl` + `codex.tool(...)` for all tool calls, including shell commands.\n- MCP tools (if any) can also be called by name via `codex.tool(...)`.\n- Avoid direct access to `process.stdout` / `process.stderr` / `process.stdin`; it can corrupt the JSON line protocol. Use `console.log`, `codex.tool(...)`, and `codex.emitImage(...)`."; assert_eq!(res, expected); } diff --git a/codex-rs/core/src/tools/code_mode/mod.rs b/codex-rs/core/src/tools/code_mode/mod.rs index 0004ec6f5..8445fdf64 100644 --- a/codex-rs/core/src/tools/code_mode/mod.rs +++ b/codex-rs/core/src/tools/code_mode/mod.rs @@ -18,6 +18,8 @@ use tokio_util::sync::CancellationToken; use crate::codex::Session; use crate::codex::TurnContext; use crate::function_tool::FunctionCallError; +use crate::original_image_detail::can_request_original_image_detail; +use crate::original_image_detail::sanitize_original_image_detail as sanitize_image_detail_items; use crate::tools::ToolRouter; use crate::tools::context::FunctionToolOutput; use crate::tools::context::SharedTurnDiffTracker; @@ -160,12 +162,14 @@ pub(super) async fn handle_runtime_response( match response { RuntimeResponse::Yielded { content_items, .. } => { let mut content_items = into_function_call_output_content_items(content_items); + sanitize_runtime_image_detail(exec.turn.as_ref(), &mut content_items); content_items = truncate_code_mode_result(content_items, max_output_tokens); prepend_script_status(&mut content_items, &script_status, started_at.elapsed()); Ok(FunctionToolOutput::from_content(content_items, Some(true))) } RuntimeResponse::Terminated { content_items, .. } => { let mut content_items = into_function_call_output_content_items(content_items); + sanitize_runtime_image_detail(exec.turn.as_ref(), &mut content_items); content_items = truncate_code_mode_result(content_items, max_output_tokens); prepend_script_status(&mut content_items, &script_status, started_at.elapsed()); Ok(FunctionToolOutput::from_content(content_items, Some(true))) @@ -177,6 +181,7 @@ pub(super) async fn handle_runtime_response( .. } => { let mut content_items = into_function_call_output_content_items(content_items); + sanitize_runtime_image_detail(exec.turn.as_ref(), &mut content_items); exec.session .services .code_mode_service @@ -198,6 +203,10 @@ pub(super) async fn handle_runtime_response( } } +fn sanitize_runtime_image_detail(turn: &TurnContext, items: &mut [FunctionCallOutputContentItem]) { + sanitize_image_detail_items(can_request_original_image_detail(&turn.model_info), items); +} + fn format_script_status(response: &RuntimeResponse) -> String { match response { RuntimeResponse::Yielded { cell_id, .. } => { diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index dff1e444d..60bcbcda2 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -1,5 +1,6 @@ use crate::codex::Session; use crate::codex::TurnContext; +use crate::original_image_detail::sanitize_original_image_detail; use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES; use crate::tools::TELEMETRY_PREVIEW_MAX_LINES; use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE; @@ -122,6 +123,7 @@ impl ToolOutput for CallToolResult { pub struct McpToolOutput { pub result: CallToolResult, pub wall_time: Duration, + pub original_image_detail_supported: bool, } impl ToolOutput for McpToolOutput { @@ -155,6 +157,10 @@ impl ToolOutput for McpToolOutput { impl McpToolOutput { fn response_payload(&self) -> FunctionCallOutputPayload { let mut payload = self.result.as_function_call_output_payload(); + if let Some(items) = payload.content_items_mut() { + sanitize_original_image_detail(self.original_image_detail_supported, items); + } + let wall_time_seconds = self.wall_time.as_secs_f64(); let header = format!("Wall time: {wall_time_seconds:.4} seconds\nOutput:"); diff --git a/codex-rs/core/src/tools/context_tests.rs b/codex-rs/core/src/tools/context_tests.rs index d45f35183..8df9159ed 100644 --- a/codex-rs/core/src/tools/context_tests.rs +++ b/codex-rs/core/src/tools/context_tests.rs @@ -98,6 +98,7 @@ fn mcp_tool_output_response_item_includes_wall_time() { meta: None, }, wall_time: std::time::Duration::from_millis(1250), + original_image_detail_supported: false, }; let response = output.to_response_item( @@ -149,6 +150,7 @@ fn mcp_tool_output_response_item_preserves_content_items() { meta: None, }, wall_time: std::time::Duration::from_millis(500), + original_image_detail_supported: false, }; let response = output.to_response_item( @@ -201,6 +203,7 @@ fn mcp_tool_output_code_mode_result_stays_raw_call_tool_result() { meta: None, }, wall_time: std::time::Duration::from_millis(1250), + original_image_detail_supported: false, }; let result = output.code_mode_result(&ToolPayload::Mcp { diff --git a/codex-rs/core/src/tools/handlers/mcp.rs b/codex-rs/core/src/tools/handlers/mcp.rs index cfe219f44..b24db734b 100644 --- a/codex-rs/core/src/tools/handlers/mcp.rs +++ b/codex-rs/core/src/tools/handlers/mcp.rs @@ -3,6 +3,7 @@ use std::time::Instant; use crate::function_tool::FunctionCallError; use crate::mcp_tool_call::handle_mcp_tool_call; +use crate::original_image_detail::can_request_original_image_detail; use crate::tools::context::McpToolOutput; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolPayload; @@ -56,6 +57,7 @@ impl ToolHandler for McpHandler { Ok(McpToolOutput { result, wall_time: started.elapsed(), + original_image_detail_supported: can_request_original_image_detail(&turn.model_info), }) } } diff --git a/codex-rs/core/src/tools/js_repl/kernel.js b/codex-rs/core/src/tools/js_repl/kernel.js index 5d3181852..3e5cf855f 100644 --- a/codex-rs/core/src/tools/js_repl/kernel.js +++ b/codex-rs/core/src/tools/js_repl/kernel.js @@ -1330,6 +1330,13 @@ function normalizeMcpImageData(data, mimeType) { return `data:${normalizedMimeType};base64,${data}`; } +function parseMcpImageDetail(meta) { + if (!isPlainObject(meta) || meta["codex/imageDetail"] !== "original") { + return undefined; + } + return "original"; +} + function parseMcpToolResult(result) { if (typeof result === "string") { return { images: [], textCount: result.length > 0 ? 1 : 0 }; @@ -1362,6 +1369,7 @@ function parseMcpToolResult(result) { if (item.type === "image") { images.push({ image_url: normalizeMcpImageData(item.data, item.mimeType ?? item.mime_type), + detail: parseMcpImageDetail(item._meta), }); continue; } diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index f010c9e58..d514e801e 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -178,7 +178,16 @@ async fn run_code_mode_turn_with_rmcp( prompt: &str, code: &str, ) -> Result<(TestCodex, ResponseMock)> { - run_code_mode_turn_with_rmcp_mode(server, prompt, code, /*code_mode_only*/ false).await + run_code_mode_turn_with_rmcp_model(server, prompt, code, "test-gpt-5.1-codex").await +} + +async fn run_code_mode_turn_with_rmcp_model( + server: &MockServer, + prompt: &str, + code: &str, + model: &'static str, +) -> Result<(TestCodex, ResponseMock)> { + run_code_mode_turn_with_rmcp_config(server, prompt, code, model, /*code_mode_only*/ false).await } async fn run_code_mode_turn_with_rmcp_mode( @@ -187,48 +196,57 @@ async fn run_code_mode_turn_with_rmcp_mode( code: &str, code_mode_only: bool, ) -> Result<(TestCodex, ResponseMock)> { - let rmcp_test_server_bin = stdio_server_bin()?; - let mut builder = test_codex() - .with_model("test-gpt-5.1-codex") - .with_config(move |config| { - let _ = if code_mode_only { - config.features.enable(Feature::CodeModeOnly) - } else { - config.features.enable(Feature::CodeMode) - }; + run_code_mode_turn_with_rmcp_config(server, prompt, code, "test-gpt-5.1-codex", code_mode_only) + .await +} - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - "rmcp".to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: Some(HashMap::from([( - "MCP_TEST_VALUE".to_string(), - "propagated-env".to_string(), - )])), - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), +async fn run_code_mode_turn_with_rmcp_config( + server: &MockServer, + prompt: &str, + code: &str, + model: &'static str, + code_mode_only: bool, +) -> Result<(TestCodex, ResponseMock)> { + let rmcp_test_server_bin = stdio_server_bin()?; + let mut builder = test_codex().with_model(model).with_config(move |config| { + let _ = if code_mode_only { + config.features.enable(Feature::CodeModeOnly) + } else { + config.features.enable(Feature::CodeMode) + }; + + let mut servers = config.mcp_servers.get().clone(); + servers.insert( + "rmcp".to_string(), + McpServerConfig { + transport: McpServerTransportConfig::Stdio { + command: rmcp_test_server_bin, + args: Vec::new(), + env: Some(HashMap::from([( + "MCP_TEST_VALUE".to_string(), + "propagated-env".to_string(), + )])), + env_vars: Vec::new(), + cwd: None, }, - ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); - }); + enabled: true, + required: false, + supports_parallel_tool_calls: false, + disabled_reason: None, + startup_timeout_sec: Some(Duration::from_secs(10)), + tool_timeout_sec: None, + enabled_tools: None, + disabled_tools: None, + scopes: None, + oauth_resource: None, + tools: HashMap::new(), + }, + ); + config + .mcp_servers + .set(servers) + .expect("test mcp servers should accept any configuration"); + }); let test = builder.build(server).await?; responses::mount_sse_once( @@ -1919,6 +1937,62 @@ image(out); Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_use_mcp_image_result_with_image_helper() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let code = r#" +const out = await tools.mcp__rmcp__image_scenario({ + scenario: "image_only_original_detail", +}); +const imageItem = out.content.find((item) => item.type === "image"); +image(imageItem); +"#; + + let (_test, second_mock) = run_code_mode_turn_with_rmcp_model( + &server, + "use exec to call the rmcp image scenario tool and emit its image output", + code, + "gpt-5.3-codex", + ) + .await?; + + let req = second_mock.single_request(); + let items = custom_tool_output_items(&req, "call-1"); + let (_, success) = custom_tool_output_body_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode mcp image scenario call failed unexpectedly" + ); + assert_eq!(items.len(), 2); + assert_regex_match( + concat!( + r"(?s)\A", + r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z" + ), + text_item(&items, /*index*/ 0), + ); + + assert_eq!( + items[1].get("type").and_then(Value::as_str), + Some("input_image") + ); + + let emitted_image_url = items[1] + .get("image_url") + .and_then(Value::as_str) + .expect("image helper should emit an input_image item with image_url"); + assert!(emitted_image_url.starts_with("data:image/png;base64,")); + assert_eq!( + items[1].get("detail").and_then(Value::as_str), + Some("original") + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> { skip_if_no_network!(Ok(())); diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index b86094f91..6d19e6ac5 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -11,6 +11,8 @@ use std::time::UNIX_EPOCH; use codex_config::types::McpServerConfig; use codex_config::types::McpServerTransportConfig; +use codex_core::config::Config; +use codex_features::Feature; use codex_login::CodexAuth; use codex_mcp::MCP_SANDBOX_STATE_META_CAPABILITY; use codex_models_manager::manager::RefreshStrategy; @@ -33,10 +35,12 @@ use codex_protocol::user_input::UserInput; use codex_utils_cargo_bin::cargo_bin; use core_test_support::assert_regex_match; use core_test_support::responses; +use core_test_support::responses::ev_custom_tool_call; use core_test_support::responses::mount_models_once; use core_test_support::responses::mount_sse_once; use core_test_support::skip_if_no_network; use core_test_support::stdio_server_bin; +use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_with_timeout; @@ -82,6 +86,82 @@ enum McpCallEvent { End(String), } +async fn wait_for_mcp_tool(fixture: &TestCodex, tool_name: &str) -> anyhow::Result<()> { + let tools_ready_deadline = Instant::now() + Duration::from_secs(30); + loop { + fixture.codex.submit(Op::ListMcpTools).await?; + let list_event = wait_for_event_with_timeout( + &fixture.codex, + |ev| matches!(ev, EventMsg::McpListToolsResponse(_)), + Duration::from_secs(10), + ) + .await; + let EventMsg::McpListToolsResponse(tool_list) = list_event else { + unreachable!("event guard guarantees McpListToolsResponse"); + }; + if tool_list.tools.contains_key(tool_name) { + return Ok(()); + } + + let available_tools: Vec<&str> = tool_list.tools.keys().map(String::as_str).collect(); + if Instant::now() >= tools_ready_deadline { + panic!( + "timed out waiting for MCP tool {tool_name} to become available; discovered tools: {available_tools:?}" + ); + } + sleep(Duration::from_millis(200)).await; + } +} + +#[derive(Default)] +struct TestMcpServerOptions { + supports_parallel_tool_calls: bool, + tool_timeout_sec: Option, +} + +fn stdio_transport( + command: String, + env: Option>, + env_vars: Vec, +) -> McpServerTransportConfig { + McpServerTransportConfig::Stdio { + command, + args: Vec::new(), + env, + env_vars, + cwd: None, + } +} + +fn insert_mcp_server( + config: &mut Config, + server_name: &str, + transport: McpServerTransportConfig, + options: TestMcpServerOptions, +) { + let mut servers = config.mcp_servers.get().clone(); + servers.insert( + server_name.to_string(), + McpServerConfig { + transport, + enabled: true, + required: false, + supports_parallel_tool_calls: options.supports_parallel_tool_calls, + disabled_reason: None, + startup_timeout_sec: Some(Duration::from_secs(10)), + tool_timeout_sec: options.tool_timeout_sec, + enabled_tools: None, + disabled_tools: None, + scopes: None, + oauth_resource: None, + tools: HashMap::new(), + }, + ); + if let Err(err) = config.mcp_servers.set(servers) { + panic!("test mcp servers should accept any configuration: {err}"); + } +} + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[serial(mcp_test_value)] async fn stdio_server_round_trip() -> anyhow::Result<()> { @@ -121,37 +201,19 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> { let fixture = test_codex() .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: Some(HashMap::from([( - "MCP_TEST_VALUE".to_string(), - expected_env_value.to_string(), - )])), - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), - }, + insert_mcp_server( + config, + server_name, + stdio_transport( + rmcp_test_server_bin, + Some(HashMap::from([( + "MCP_TEST_VALUE".to_string(), + expected_env_value.to_string(), + )])), + Vec::new(), + ), + TestMcpServerOptions::default(), ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; @@ -282,34 +344,12 @@ async fn stdio_mcp_tool_call_includes_sandbox_state_meta() -> anyhow::Result<()> let rmcp_test_server_bin = stdio_server_bin()?; let fixture = test_codex() .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: None, - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), - }, + insert_mcp_server( + config, + server_name, + stdio_transport(rmcp_test_server_bin, /*env*/ None, Vec::new()), + TestMcpServerOptions::default(), ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; @@ -417,34 +457,15 @@ async fn stdio_mcp_parallel_tool_calls_default_false_runs_serially() -> anyhow:: let fixture = test_codex() .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: None, - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), + insert_mcp_server( + config, + server_name, + stdio_transport(rmcp_test_server_bin, /*env*/ None, Vec::new()), + TestMcpServerOptions { tool_timeout_sec: Some(Duration::from_secs(2)), - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), + ..Default::default() }, ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; @@ -567,34 +588,15 @@ async fn stdio_mcp_parallel_tool_calls_opt_in_runs_concurrently() -> anyhow::Res let fixture = test_codex() .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: None, - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, + insert_mcp_server( + config, + server_name, + stdio_transport(rmcp_test_server_bin, /*env*/ None, Vec::new()), + TestMcpServerOptions { supports_parallel_tool_calls: true, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), tool_timeout_sec: Some(Duration::from_secs(2)), - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), }, ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; @@ -676,66 +678,25 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> { let fixture = test_codex() .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: Some(HashMap::from([( - "MCP_TEST_IMAGE_DATA_URL".to_string(), - OPENAI_PNG.to_string(), - )])), - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), - }, + insert_mcp_server( + config, + server_name, + stdio_transport( + rmcp_test_server_bin, + Some(HashMap::from([( + "MCP_TEST_IMAGE_DATA_URL".to_string(), + OPENAI_PNG.to_string(), + )])), + Vec::new(), + ), + TestMcpServerOptions::default(), ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; let session_model = fixture.session_configured.model.clone(); - let tools_ready_deadline = Instant::now() + Duration::from_secs(30); - loop { - fixture.codex.submit(Op::ListMcpTools).await?; - let list_event = wait_for_event_with_timeout( - &fixture.codex, - |ev| matches!(ev, EventMsg::McpListToolsResponse(_)), - Duration::from_secs(10), - ) - .await; - let EventMsg::McpListToolsResponse(tool_list) = list_event else { - unreachable!("event guard guarantees McpListToolsResponse"); - }; - if tool_list.tools.contains_key(&tool_name) { - break; - } - - let available_tools: Vec<&str> = tool_list.tools.keys().map(String::as_str).collect(); - if Instant::now() >= tools_ready_deadline { - panic!( - "timed out waiting for MCP tool {tool_name} to become available; discovered tools: {available_tools:?}" - ); - } - sleep(Duration::from_millis(200)).await; - } + wait_for_mcp_tool(&fixture, &tool_name).await?; fixture .codex @@ -830,6 +791,189 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +#[serial(mcp_test_value)] +async fn stdio_image_responses_preserve_original_detail_metadata() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + + let call_id = "img-original-detail-1"; + let server_name = "rmcp"; + let tool_name = format!("mcp__{server_name}__image_scenario"); + let namespace = format!("mcp__{server_name}__"); + + mount_sse_once( + &server, + responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call_with_namespace( + call_id, + &namespace, + "image_scenario", + r#"{"scenario":"image_only_original_detail"}"#, + ), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let final_mock = mount_sse_once( + &server, + responses::sse(vec![ + responses::ev_assistant_message("msg-1", "rmcp original-detail image completed."), + responses::ev_completed("resp-2"), + ]), + ) + .await; + + let rmcp_test_server_bin = stdio_server_bin()?; + + let fixture = test_codex() + .with_model("gpt-5.3-codex") + .with_config(move |config| { + insert_mcp_server( + config, + server_name, + stdio_transport(rmcp_test_server_bin, /*env*/ None, Vec::new()), + TestMcpServerOptions::default(), + ); + }) + .build(&server) + .await?; + let session_model = fixture.session_configured.model.clone(); + + wait_for_mcp_tool(&fixture, &tool_name).await?; + + fixture + .codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "call the rmcp image_scenario tool".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: fixture.cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + approvals_reviewer: None, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: session_model, + effort: None, + summary: None, + service_tier: None, + collaboration_mode: None, + personality: None, + }) + .await?; + + wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let output_item = final_mock.single_request().function_call_output(call_id); + let output = output_item["output"] + .as_array() + .expect("image MCP output should be content items"); + assert_eq!(output.len(), 2); + assert_wall_time_header( + output[0]["text"] + .as_str() + .expect("first MCP image output item should be wall-time text"), + ); + assert_eq!( + output[1], + json!({ + "type": "input_image", + "image_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==", + "detail": "original", + }) + ); + + server.verify().await; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +#[serial(mcp_test_value)] +async fn js_repl_emit_image_preserves_original_detail_for_mcp_images() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let call_id = "js-repl-rmcp-image"; + let rmcp_test_server_bin = stdio_server_bin()?; + + let fixture = test_codex() + .with_model("gpt-5.3-codex") + .with_config(move |config| { + config + .features + .enable(Feature::JsRepl) + .expect("test config should allow feature update"); + insert_mcp_server( + config, + "rmcp", + stdio_transport(rmcp_test_server_bin, /*env*/ None, Vec::new()), + TestMcpServerOptions::default(), + ); + }) + .build(&server) + .await?; + + wait_for_mcp_tool(&fixture, "mcp__rmcp__image_scenario").await?; + + mount_sse_once( + &server, + responses::sse(vec![ + responses::ev_response_created("resp-1"), + ev_custom_tool_call( + call_id, + "js_repl", + r#" +const out = await codex.tool("mcp__rmcp__image_scenario", { + scenario: "image_only_original_detail", +}); +const imageItem = out.output.find((item) => item.type === "input_image"); +await codex.emitImage(imageItem); +"#, + ), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let final_mock = mount_sse_once( + &server, + responses::sse(vec![ + responses::ev_assistant_message("msg-1", "done"), + responses::ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn("use js_repl to emit the rmcp image scenario output") + .await?; + + let output = final_mock.single_request().custom_tool_call_output(call_id); + let output_items = output["output"] + .as_array() + .expect("js_repl output should be content items"); + let image_item = output_items + .iter() + .find(|item| item.get("type").and_then(Value::as_str) == Some("input_image")) + .expect("js_repl should emit an input_image item"); + assert_eq!( + image_item.get("detail").and_then(Value::as_str), + Some("original") + ); + assert!( + image_item + .get("image_url") + .and_then(Value::as_str) + .is_some_and(|image_url| image_url.starts_with("data:image/png;base64,")), + "js_repl should emit a png data URL" + ); + + server.verify().await; + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[serial(mcp_test_value)] async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Result<()> { @@ -909,37 +1053,19 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re let fixture = test_codex() .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: Some(HashMap::from([( - "MCP_TEST_IMAGE_DATA_URL".to_string(), - OPENAI_PNG.to_string(), - )])), - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), - }, + insert_mcp_server( + config, + server_name, + stdio_transport( + rmcp_test_server_bin, + Some(HashMap::from([( + "MCP_TEST_IMAGE_DATA_URL".to_string(), + OPENAI_PNG.to_string(), + )])), + Vec::new(), + ), + TestMcpServerOptions::default(), ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; @@ -1041,34 +1167,16 @@ async fn stdio_server_propagates_whitelisted_env_vars() -> anyhow::Result<()> { let fixture = test_codex() .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: None, - env_vars: vec!["MCP_TEST_VALUE".to_string()], - cwd: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), - }, + insert_mcp_server( + config, + server_name, + stdio_transport( + rmcp_test_server_bin, + /*env*/ None, + vec!["MCP_TEST_VALUE".to_string()], + ), + TestMcpServerOptions::default(), ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; @@ -1211,33 +1319,17 @@ async fn streamable_http_tool_call_round_trip() -> anyhow::Result<()> { let fixture = test_codex() .with_config(move |config| { - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::StreamableHttp { - url: server_url, - bearer_token_env_var: None, - http_headers: None, - env_http_headers: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), + insert_mcp_server( + config, + server_name, + McpServerTransportConfig::StreamableHttp { + url: server_url, + bearer_token_env_var: None, + http_headers: None, + env_http_headers: None, }, + TestMcpServerOptions::default(), ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; @@ -1441,62 +1533,23 @@ async fn streamable_http_with_oauth_round_trip_impl() -> anyhow::Result<()> { // runs the full core suite in one process. config.mcp_oauth_credentials_store_mode = serde_json::from_value(json!("file")) .expect("`file` should deserialize as OAuthCredentialsStoreMode"); - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - server_name.to_string(), - McpServerConfig { - transport: McpServerTransportConfig::StreamableHttp { - url: server_url, - bearer_token_env_var: None, - http_headers: None, - env_http_headers: None, - }, - enabled: true, - required: false, - supports_parallel_tool_calls: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - oauth_resource: None, - tools: HashMap::new(), + insert_mcp_server( + config, + server_name, + McpServerTransportConfig::StreamableHttp { + url: server_url, + bearer_token_env_var: None, + http_headers: None, + env_http_headers: None, }, + TestMcpServerOptions::default(), ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); }) .build(&server) .await?; let session_model = fixture.session_configured.model.clone(); - let tools_ready_deadline = Instant::now() + Duration::from_secs(30); - loop { - fixture.codex.submit(Op::ListMcpTools).await?; - let list_event = wait_for_event_with_timeout( - &fixture.codex, - |ev| matches!(ev, EventMsg::McpListToolsResponse(_)), - Duration::from_secs(10), - ) - .await; - let EventMsg::McpListToolsResponse(tool_list) = list_event else { - unreachable!("event guard guarantees McpListToolsResponse"); - }; - if tool_list.tools.contains_key(&tool_name) { - break; - } - - let available_tools: Vec<&str> = tool_list.tools.keys().map(String::as_str).collect(); - if Instant::now() >= tools_ready_deadline { - panic!( - "timed out waiting for MCP tool {tool_name} to become available; discovered tools: {available_tools:?}" - ); - } - sleep(Duration::from_millis(200)).await; - } + wait_for_mcp_tool(&fixture, &tool_name).await?; fixture .codex diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 0f18b6839..220045652 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -1362,6 +1362,8 @@ impl CallToolResult { fn convert_mcp_content_to_items( contents: &[serde_json::Value], ) -> Option> { + const CODEX_IMAGE_DETAIL_META_KEY: &str = "codex/imageDetail"; + #[derive(serde::Deserialize)] #[serde(tag = "type")] enum McpContent { @@ -1372,6 +1374,8 @@ fn convert_mcp_content_to_items( data: String, #[serde(rename = "mimeType", alias = "mime_type")] mime_type: Option, + #[serde(rename = "_meta", default)] + meta: Option, }, #[serde(other)] Unknown, @@ -1383,7 +1387,11 @@ fn convert_mcp_content_to_items( for content in contents { let item = match serde_json::from_value::(content.clone()) { Ok(McpContent::Text { text }) => FunctionCallOutputContentItem::InputText { text }, - Ok(McpContent::Image { data, mime_type }) => { + Ok(McpContent::Image { + data, + mime_type, + meta, + }) => { saw_image = true; let image_url = if data.starts_with("data:") { data @@ -1393,7 +1401,15 @@ fn convert_mcp_content_to_items( }; FunctionCallOutputContentItem::InputImage { image_url, - detail: None, + detail: meta + .as_ref() + .and_then(serde_json::Value::as_object) + .and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY)) + .and_then(serde_json::Value::as_str) + .and_then(|detail| match detail { + "original" => Some(ImageDetail::Original), + _ => None, + }), } } Ok(McpContent::Unknown) | Err(_) => FunctionCallOutputContentItem::InputText { @@ -2264,6 +2280,70 @@ mod tests { Ok(()) } + #[test] + fn preserves_original_detail_metadata_on_mcp_images() -> Result<()> { + let call_tool_result = CallToolResult { + content: vec![serde_json::json!({ + "type": "image", + "data": "BASE64", + "mimeType": "image/png", + "_meta": { + "codex/imageDetail": "original", + }, + })], + structured_content: None, + is_error: Some(false), + meta: None, + }; + + let payload = call_tool_result.into_function_call_output_payload(); + let Some(items) = payload.content_items() else { + panic!("expected content items"); + }; + let items = items.to_vec(); + assert_eq!( + items, + vec![FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,BASE64".into(), + detail: Some(ImageDetail::Original), + }] + ); + + Ok(()) + } + + #[test] + fn ignores_unknown_mcp_image_detail_metadata() -> Result<()> { + let call_tool_result = CallToolResult { + content: vec![serde_json::json!({ + "type": "image", + "data": "BASE64", + "mimeType": "image/png", + "_meta": { + "codex/imageDetail": "high", + }, + })], + structured_content: None, + is_error: Some(false), + meta: None, + }; + + let payload = call_tool_result.into_function_call_output_payload(); + let Some(items) = payload.content_items() else { + panic!("expected content items"); + }; + let items = items.to_vec(); + assert_eq!( + items, + vec![FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,BASE64".into(), + detail: None, + }] + ); + + Ok(()) + } + #[test] fn deserializes_array_payload_into_items() -> Result<()> { let json = r#"[ diff --git a/codex-rs/rmcp-client/src/bin/test_stdio_server.rs b/codex-rs/rmcp-client/src/bin/test_stdio_server.rs index 326fd9427..82bc387d9 100644 --- a/codex-rs/rmcp-client/src/bin/test_stdio_server.rs +++ b/codex-rs/rmcp-client/src/bin/test_stdio_server.rs @@ -208,6 +208,7 @@ impl TestToolServer { /// - `codex mcp add mcpimg -- /abs/path/to/test_stdio_server` /// - Then in Codex TUI, ask it to call: /// - `mcpimg.image_scenario({"scenario":"image_only"})` + /// - `mcpimg.image_scenario({"scenario":"image_only_original_detail"})` /// - `mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"})` /// - `mcpimg.image_scenario({"scenario":"invalid_base64_then_image"})` /// - `mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"})` @@ -224,6 +225,7 @@ impl TestToolServer { "type": "string", "enum": [ "image_only", + "image_only_original_detail", "text_then_image", "invalid_base64_then_image", "invalid_image_bytes_then_image", @@ -339,6 +341,7 @@ fn sync_barrier_map() -> &'static tokio::sync::Mutex { content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); } + ImageScenario::ImageOnlyOriginalDetail => { + let mut meta = rmcp::model::Meta::new(); + meta.insert( + "codex/imageDetail".to_string(), + serde_json::json!("original"), + ); + content.push(rmcp::model::Annotated::new( + rmcp::model::RawContent::Image(rmcp::model::RawImageContent { + data: valid_data_b64, + mime_type, + meta: Some(meta), + }), + None, + )); + } ImageScenario::TextThenImage => { content.push(rmcp::model::Content::text(caption)); content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); diff --git a/codex-rs/tools/src/image_detail.rs b/codex-rs/tools/src/image_detail.rs index 94639e61f..56987e483 100644 --- a/codex-rs/tools/src/image_detail.rs +++ b/codex-rs/tools/src/image_detail.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::ImageDetail; use codex_protocol::openai_models::ModelInfo; @@ -17,6 +18,23 @@ pub fn normalize_output_image_detail( } } +pub fn sanitize_original_image_detail( + can_request_original_image_detail: bool, + items: &mut [FunctionCallOutputContentItem], +) { + if can_request_original_image_detail { + return; + } + + for item in items { + if let FunctionCallOutputContentItem::InputImage { detail, .. } = item + && matches!(detail, Some(ImageDetail::Original)) + { + *detail = None; + } + } +} + #[cfg(test)] #[path = "image_detail_tests.rs"] mod tests; diff --git a/codex-rs/tools/src/image_detail_tests.rs b/codex-rs/tools/src/image_detail_tests.rs index 35d666149..c1a0f8ca1 100644 --- a/codex-rs/tools/src/image_detail_tests.rs +++ b/codex-rs/tools/src/image_detail_tests.rs @@ -1,4 +1,5 @@ use super::*; +use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::ImageDetail; use codex_protocol::openai_models::ModelInfo; use pretty_assertions::assert_eq; @@ -73,3 +74,39 @@ fn unsupported_non_original_detail_is_dropped() { None ); } + +#[test] +fn sanitize_original_drops_original_without_support() { + let mut items = vec![ + FunctionCallOutputContentItem::InputText { + text: "header".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + detail: Some(ImageDetail::Original), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,BBB".to_string(), + detail: Some(ImageDetail::Low), + }, + ]; + + sanitize_original_image_detail(/*can_request_original_image_detail*/ false, &mut items); + + assert_eq!( + items, + vec![ + FunctionCallOutputContentItem::InputText { + text: "header".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + detail: None, + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,BBB".to_string(), + detail: Some(ImageDetail::Low), + }, + ] + ); +} diff --git a/codex-rs/tools/src/lib.rs b/codex-rs/tools/src/lib.rs index 4a3ccc01a..b7c6a57d2 100644 --- a/codex-rs/tools/src/lib.rs +++ b/codex-rs/tools/src/lib.rs @@ -53,6 +53,7 @@ pub use codex_protocol::ToolName; pub use dynamic_tool::parse_dynamic_tool; pub use image_detail::can_request_original_image_detail; pub use image_detail::normalize_output_image_detail; +pub use image_detail::sanitize_original_image_detail; pub use js_repl_tool::create_js_repl_reset_tool; pub use js_repl_tool::create_js_repl_tool; pub use json_schema::AdditionalProperties; diff --git a/docs/js_repl.md b/docs/js_repl.md index d5edc71b4..aee635b77 100644 --- a/docs/js_repl.md +++ b/docs/js_repl.md @@ -86,6 +86,7 @@ imported local file. They are not resolved relative to the imported file's locat - `codex.emitImage(...)` accepts a data URL, a single `input_image` item, an object like `{ bytes, mimeType }`, or a raw tool response object that contains exactly one image and no text. Call it multiple times if you want to emit multiple images. - `codex.emitImage(...)` rejects mixed text-and-image content. - Request full-resolution image processing with `detail: "original"` only when the `view_image` tool schema includes a `detail` argument. The same availability applies to `codex.emitImage(...)`: if `view_image.detail` is present, you may also pass `detail: "original"` there. Use this when high-fidelity image perception or precise localization is needed, especially for CUA agents. +- Raw MCP image blocks can request the same behavior by returning `_meta: { "codex/imageDetail": "original" }` on the image content item. - Example of sharing an in-memory Playwright screenshot: `await codex.emitImage({ bytes: await page.screenshot({ type: "jpeg", quality: 85 }), mimeType: "image/jpeg", detail: "original" })`. - Example of sharing a local image tool result: `await codex.emitImage(codex.tool("view_image", { path: "/absolute/path", detail: "original" }))`. - When encoding an image to send with `codex.emitImage(...)` or `view_image`, prefer JPEG at about 85 quality when lossy compression is acceptable; use PNG when transparency or lossless detail matters. Smaller uploads are faster and less likely to hit size limits.