mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
[codex] Emit image view as core item (#20512)
## Why Image-view results should be represented as a core-produced turn item instead of being reconstructed by app-server. At the same time, existing rollout/history paths still understand the legacy `ViewImageToolCall` event, so this keeps that event as compatibility output generated from the new item lifecycle. ## What changed - Added `TurnItem::ImageView` to `codex-protocol`. - Emitted image-view item start/completion directly from the core `view_image` handler. - Kept `ViewImageToolCall` as a legacy event and generate it from completed `TurnItem::ImageView` items. - Kept `thread_history.rs` on the legacy `ViewImageToolCall` replay path, with `ImageView` item lifecycle events ignored there. - Updated app-server protocol conversion, rollout persistence, and affected exhaustive event matches for the new item plus legacy fan-out shape. ## Verification - `cargo test -p codex-protocol -p codex-app-server-protocol -p codex-rollout -p codex-rollout-trace -p codex-mcp-server -p codex-app-server --lib` - `cargo test -p codex-core --test all view_image_tool_attaches_local_image` - `just fix -p codex-protocol -p codex-core -p codex-app-server-protocol -p codex-app-server -p codex-rollout -p codex-rollout-trace -p codex-mcp-server` - `git diff --check`
This commit is contained in:
committed by
GitHub
Unverified
parent
610eefb86b
commit
aed74e5ee4
@@ -356,6 +356,7 @@ impl ThreadHistoryBuilder {
|
||||
| codex_protocol::items::TurnItem::AgentMessage(_)
|
||||
| codex_protocol::items::TurnItem::Reasoning(_)
|
||||
| codex_protocol::items::TurnItem::WebSearch(_)
|
||||
| codex_protocol::items::TurnItem::ImageView(_)
|
||||
| codex_protocol::items::TurnItem::ImageGeneration(_)
|
||||
| codex_protocol::items::TurnItem::FileChange(_)
|
||||
| codex_protocol::items::TurnItem::ContextCompaction(_) => {}
|
||||
@@ -378,6 +379,7 @@ impl ThreadHistoryBuilder {
|
||||
| codex_protocol::items::TurnItem::AgentMessage(_)
|
||||
| codex_protocol::items::TurnItem::Reasoning(_)
|
||||
| codex_protocol::items::TurnItem::WebSearch(_)
|
||||
| codex_protocol::items::TurnItem::ImageView(_)
|
||||
| codex_protocol::items::TurnItem::ImageGeneration(_)
|
||||
| codex_protocol::items::TurnItem::FileChange(_)
|
||||
| codex_protocol::items::TurnItem::ContextCompaction(_) => {}
|
||||
|
||||
@@ -6463,6 +6463,10 @@ impl From<CoreTurnItem> for ThreadItem {
|
||||
query: search.query,
|
||||
action: Some(WebSearchAction::from(search.action)),
|
||||
},
|
||||
CoreTurnItem::ImageView(image) => ThreadItem::ImageView {
|
||||
id: image.id,
|
||||
path: image.path,
|
||||
},
|
||||
CoreTurnItem::ImageGeneration(image) => ThreadItem::ImageGeneration {
|
||||
id: image.id,
|
||||
status: image.status,
|
||||
@@ -8089,6 +8093,7 @@ mod tests {
|
||||
use codex_protocol::items::AgentMessageContent;
|
||||
use codex_protocol::items::AgentMessageItem;
|
||||
use codex_protocol::items::FileChangeItem;
|
||||
use codex_protocol::items::ImageViewItem;
|
||||
use codex_protocol::items::ReasoningItem;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::items::UserMessageItem;
|
||||
@@ -10370,6 +10375,19 @@ mod tests {
|
||||
}
|
||||
);
|
||||
|
||||
let image_view_item = TurnItem::ImageView(ImageViewItem {
|
||||
id: "view-image-1".to_string(),
|
||||
path: test_path_buf("/tmp/view-image.png").abs(),
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
ThreadItem::from(image_view_item),
|
||||
ThreadItem::ImageView {
|
||||
id: "view-image-1".to_string(),
|
||||
path: test_path_buf("/tmp/view-image.png").abs(),
|
||||
}
|
||||
);
|
||||
|
||||
let file_change_item = TurnItem::FileChange(FileChangeItem {
|
||||
id: "patch-1".to_string(),
|
||||
changes: [(
|
||||
|
||||
@@ -954,28 +954,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
}))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ViewImageToolCall(view_image_event) => {
|
||||
let item = ThreadItem::ImageView {
|
||||
id: view_image_event.call_id.clone(),
|
||||
path: view_image_event.path.clone(),
|
||||
};
|
||||
let started = ItemStartedNotification {
|
||||
thread_id: conversation_id.to_string(),
|
||||
turn_id: event_turn_id.clone(),
|
||||
item: item.clone(),
|
||||
};
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemStarted(started))
|
||||
.await;
|
||||
let completed = ItemCompletedNotification {
|
||||
thread_id: conversation_id.to_string(),
|
||||
turn_id: event_turn_id.clone(),
|
||||
item,
|
||||
};
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemCompleted(completed))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ViewImageToolCall(_) => {}
|
||||
EventMsg::EnteredReviewMode(review_request) => {
|
||||
let review = review_request
|
||||
.user_facing_hint
|
||||
|
||||
@@ -1470,9 +1470,9 @@ pub(super) fn realtime_text_for_event(msg: &EventMsg) -> Option<String> {
|
||||
| EventMsg::PatchApplyBegin(_)
|
||||
| EventMsg::PatchApplyUpdated(_)
|
||||
| EventMsg::PatchApplyEnd(_)
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::ImageGenerationBegin(_)
|
||||
| EventMsg::ImageGenerationEnd(_)
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::ExecApprovalRequest(_)
|
||||
| EventMsg::RequestPermissions(_)
|
||||
| EventMsg::RequestUserInput(_)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use codex_protocol::items::ImageViewItem;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
@@ -17,8 +19,6 @@ use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::parse_arguments;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ViewImageToolCallEvent;
|
||||
|
||||
pub struct ViewImageHandler;
|
||||
|
||||
@@ -152,15 +152,12 @@ impl ToolHandler for ViewImageHandler {
|
||||
})?;
|
||||
let image_url = image.into_data_url();
|
||||
|
||||
session
|
||||
.send_event(
|
||||
turn.as_ref(),
|
||||
EventMsg::ViewImageToolCall(ViewImageToolCallEvent {
|
||||
call_id,
|
||||
path: event_path,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let item = TurnItem::ImageView(ImageViewItem {
|
||||
id: call_id,
|
||||
path: event_path,
|
||||
});
|
||||
session.emit_turn_item_started(turn.as_ref(), &item).await;
|
||||
session.emit_turn_item_completed(turn.as_ref(), item).await;
|
||||
|
||||
Ok(ViewImageOutput {
|
||||
image_url,
|
||||
|
||||
@@ -299,12 +299,26 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
))
|
||||
.await?;
|
||||
|
||||
let mut tool_event = None;
|
||||
let mut item_started = None;
|
||||
let mut item_completed = None;
|
||||
let mut legacy_event = None;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| match event {
|
||||
EventMsg::ViewImageToolCall(_) => {
|
||||
tool_event = Some(event.clone());
|
||||
EventMsg::ItemStarted(event) => {
|
||||
if matches!(&event.item, codex_protocol::items::TurnItem::ImageView(_)) {
|
||||
item_started = Some(event.item.clone());
|
||||
}
|
||||
false
|
||||
}
|
||||
EventMsg::ItemCompleted(event) => {
|
||||
if matches!(&event.item, codex_protocol::items::TurnItem::ImageView(_)) {
|
||||
item_completed = Some(event.item.clone());
|
||||
}
|
||||
false
|
||||
}
|
||||
EventMsg::ViewImageToolCall(event) => {
|
||||
legacy_event = Some(event.clone());
|
||||
false
|
||||
}
|
||||
EventMsg::TurnComplete(_) => true,
|
||||
@@ -316,12 +330,23 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
)
|
||||
.await;
|
||||
|
||||
let tool_event = match tool_event.expect("view image tool event emitted") {
|
||||
EventMsg::ViewImageToolCall(event) => event,
|
||||
_ => unreachable!("stored event must be ViewImageToolCall"),
|
||||
};
|
||||
assert_eq!(tool_event.call_id, call_id);
|
||||
assert_eq!(tool_event.path, abs_path);
|
||||
match item_started.expect("view image item started event emitted") {
|
||||
codex_protocol::items::TurnItem::ImageView(item) => {
|
||||
assert_eq!(item.id, call_id);
|
||||
assert_eq!(item.path, abs_path);
|
||||
}
|
||||
other => panic!("expected ImageView item, got {other:?}"),
|
||||
}
|
||||
match item_completed.expect("view image item completed event emitted") {
|
||||
codex_protocol::items::TurnItem::ImageView(item) => {
|
||||
assert_eq!(item.id, call_id);
|
||||
assert_eq!(item.path, abs_path);
|
||||
}
|
||||
other => panic!("expected ImageView item, got {other:?}"),
|
||||
}
|
||||
let legacy_event = legacy_event.expect("legacy view image event emitted");
|
||||
assert_eq!(legacy_event.call_id, call_id);
|
||||
assert_eq!(legacy_event.path, abs_path);
|
||||
|
||||
let req = mock.single_request();
|
||||
let body = req.body_json();
|
||||
|
||||
@@ -356,9 +356,9 @@ async fn run_codex_tool_session_inner(
|
||||
| EventMsg::TurnAborted(_)
|
||||
| EventMsg::UserMessage(_)
|
||||
| EventMsg::ShutdownComplete
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::ImageGenerationBegin(_)
|
||||
| EventMsg::ImageGenerationEnd(_)
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::RawResponseItem(_)
|
||||
| EventMsg::EnteredReviewMode(_)
|
||||
| EventMsg::ItemStarted(_)
|
||||
|
||||
@@ -14,6 +14,7 @@ use crate::protocol::PatchApplyBeginEvent;
|
||||
use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::PatchApplyStatus;
|
||||
use crate::protocol::UserMessageEvent;
|
||||
use crate::protocol::ViewImageToolCallEvent;
|
||||
use crate::protocol::WebSearchEndEvent;
|
||||
use crate::user_input::ByteRange;
|
||||
use crate::user_input::TextElement;
|
||||
@@ -38,6 +39,7 @@ pub enum TurnItem {
|
||||
Plan(PlanItem),
|
||||
Reasoning(ReasoningItem),
|
||||
WebSearch(WebSearchItem),
|
||||
ImageView(ImageViewItem),
|
||||
ImageGeneration(ImageGenerationItem),
|
||||
FileChange(FileChangeItem),
|
||||
ContextCompaction(ContextCompactionItem),
|
||||
@@ -121,6 +123,12 @@ pub struct WebSearchItem {
|
||||
pub action: WebSearchAction,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)]
|
||||
pub struct ImageViewItem {
|
||||
pub id: String,
|
||||
pub path: AbsolutePathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)]
|
||||
pub struct ImageGenerationItem {
|
||||
pub id: String,
|
||||
@@ -439,6 +447,7 @@ impl TurnItem {
|
||||
TurnItem::Plan(item) => item.id.clone(),
|
||||
TurnItem::Reasoning(item) => item.id.clone(),
|
||||
TurnItem::WebSearch(item) => item.id.clone(),
|
||||
TurnItem::ImageView(item) => item.id.clone(),
|
||||
TurnItem::ImageGeneration(item) => item.id.clone(),
|
||||
TurnItem::FileChange(item) => item.id.clone(),
|
||||
TurnItem::ContextCompaction(item) => item.id.clone(),
|
||||
@@ -452,6 +461,12 @@ impl TurnItem {
|
||||
TurnItem::AgentMessage(item) => item.as_legacy_events(),
|
||||
TurnItem::Plan(_) => Vec::new(),
|
||||
TurnItem::WebSearch(item) => vec![item.as_legacy_event()],
|
||||
TurnItem::ImageView(item) => {
|
||||
vec![EventMsg::ViewImageToolCall(ViewImageToolCallEvent {
|
||||
call_id: item.id.clone(),
|
||||
path: item.path.clone(),
|
||||
})]
|
||||
}
|
||||
TurnItem::ImageGeneration(item) => vec![item.as_legacy_event()],
|
||||
TurnItem::FileChange(item) => item
|
||||
.as_legacy_end_event(String::new())
|
||||
|
||||
@@ -1836,6 +1836,7 @@ impl HasLegacyEvent for ItemStartedEvent {
|
||||
TurnItem::WebSearch(item) => vec![EventMsg::WebSearchBegin(WebSearchBeginEvent {
|
||||
call_id: item.id.clone(),
|
||||
})],
|
||||
TurnItem::ImageView(_) => Vec::new(),
|
||||
TurnItem::ImageGeneration(item) => {
|
||||
vec![EventMsg::ImageGenerationBegin(ImageGenerationBeginEvent {
|
||||
call_id: item.id.clone(),
|
||||
|
||||
@@ -243,11 +243,11 @@ pub(crate) fn tool_runtime_trace_event(event: &EventMsg) -> Option<ToolRuntimeTr
|
||||
| EventMsg::WebSearchEnd(_)
|
||||
| EventMsg::ImageGenerationBegin(_)
|
||||
| EventMsg::ImageGenerationEnd(_)
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::ExecCommandBegin(_)
|
||||
| EventMsg::ExecCommandOutputDelta(_)
|
||||
| EventMsg::TerminalInteraction(_)
|
||||
| EventMsg::ExecCommandEnd(_)
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::ExecApprovalRequest(_)
|
||||
| EventMsg::RequestPermissions(_)
|
||||
| EventMsg::RequestUserInput(_)
|
||||
@@ -318,11 +318,11 @@ pub(crate) fn wrapped_protocol_event_type(event: &EventMsg) -> Option<&'static s
|
||||
| EventMsg::WebSearchEnd(_)
|
||||
| EventMsg::ImageGenerationBegin(_)
|
||||
| EventMsg::ImageGenerationEnd(_)
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::ExecCommandBegin(_)
|
||||
| EventMsg::ExecCommandOutputDelta(_)
|
||||
| EventMsg::TerminalInteraction(_)
|
||||
| EventMsg::ExecCommandEnd(_)
|
||||
| EventMsg::ViewImageToolCall(_)
|
||||
| EventMsg::ExecApprovalRequest(_)
|
||||
| EventMsg::RequestPermissions(_)
|
||||
| EventMsg::RequestUserInput(_)
|
||||
|
||||
Reference in New Issue
Block a user