From d2885dc3cdbaf98a60e7256ec3e7dfdf2381041d Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Sat, 27 Jun 2026 10:47:54 -0700 Subject: [PATCH] core: stabilize synthesized call output IDs (#30327) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Why Response item IDs represent stable conversation identity. `ContextManager::for_prompt` repairs an unmatched call by synthesizing an `"aborted"` output in the disposable prompt projection, but that output previously had no ID. Assigning a fresh ID on every prompt build would make retries and resumes change otherwise identical model context and reduce prompt-cache reuse. The concrete bug is that these normalization-created outputs bypass the regular item-ID allocation path. Even with item IDs enabled, a prompt could therefore contain an identified call paired with a synthetic output whose `id` was missing. This change closes that gap by deriving the output ID from the source call's item ID. For legacy calls that have no item ID, the output remains ID-less because there is no stable source identity to derive from. The originating call already has a stable item ID under the item-ID model introduced in #28814. A prompt-only output can therefore derive stable identity from that call without mutating canonical history or persisted rollouts. This addresses the failure exposed by #30311 while keeping normalization read-only outside its detached prompt snapshot. UUIDv5 is intentional here because it is the standard namespaced, deterministic UUID construction. Using the output kind and source call ID as the name produces the same UUID on every projection while keeping output kinds in separate name domains. UUIDv7 would introduce randomness and time, so keeping it stable would require persisting the synthetic repair. UUIDv5 uses SHA-1 internally, but this is only an identity mapping—not an authenticity or security boundary. ## What changed - Derive a deterministic UUIDv5 ID for each synthesized call output from the source call item ID. - Use the Responses API prefix appropriate for function, custom-tool, tool-search, and local-shell outputs. - Preserve the existing insertion position immediately after the unmatched call. - Keep synthesized outputs prompt-only; no rollout, task-lifecycle, compaction, or raw-response behavior changes. ## Testing - `just test -p codex-core for_prompt_assigns_stable_id_to_synthetic_output_without_reordering_history` - `just test -p codex-core synthetic_call_output_id_is_stable_across_resumes` - `just test -p codex-core normalize_adds_missing_output` - `just test -p codex-core response_item_ids` --- .../core/src/context_manager/history_tests.rs | 43 ++++++++ .../core/src/context_manager/normalize.rs | 32 ++++-- codex-rs/core/tests/suite/client.rs | 101 ++++++++++++++++++ 3 files changed, 170 insertions(+), 6 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index ad46f7be7..cbe0209e2 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -1629,6 +1629,49 @@ fn normalize_adds_missing_output_for_function_call_inserts_output() { ); } +#[test] +fn for_prompt_assigns_stable_id_to_synthetic_output_without_reordering_history() { + let items = vec![ + ResponseItem::FunctionCall { + id: Some("fc_existing".to_string()), + name: "do_it".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: "call-x".to_string(), + internal_chat_message_metadata_passthrough: None, + }, + ResponseItem::Message { + id: Some("msg_later".to_string()), + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: "later turn".to_string(), + }], + phase: None, + internal_chat_message_metadata_passthrough: None, + }, + ]; + + let first = create_history_with_items(items.clone()).for_prompt(&default_input_modalities()); + let second = create_history_with_items(items).for_prompt(&default_input_modalities()); + + assert_eq!( + first, second, + "repeated prompt projections should assign the same ID to the synthetic output" + ); + let [ + ResponseItem::FunctionCall { .. }, + ResponseItem::FunctionCallOutput { id: Some(id), .. }, + ResponseItem::Message { .. }, + ] = first.as_slice() + else { + panic!("expected the synthetic output between its call and the later message"); + }; + assert!( + id.starts_with("fco_"), + "the synthetic function call output should use the Responses API output ID prefix" + ); +} + #[test] fn normalize_adds_missing_output_for_tool_search_call() { let items = vec![ResponseItem::ToolSearchCall { diff --git a/codex-rs/core/src/context_manager/normalize.rs b/codex-rs/core/src/context_manager/normalize.rs index d11cffe4e..3fd1a59a8 100644 --- a/codex-rs/core/src/context_manager/normalize.rs +++ b/codex-rs/core/src/context_manager/normalize.rs @@ -4,12 +4,15 @@ use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::InputModality; use std::collections::HashSet; +use uuid::Uuid; use crate::util::error_or_panic; use tracing::info; const IMAGE_CONTENT_OMITTED_PLACEHOLDER: &str = "image content omitted because you do not support image input"; +// Changing this value would change model-visible IDs and invalidate prompt caches. +const SYNTHETIC_OUTPUT_ID_NAMESPACE: Uuid = Uuid::from_u128(0x90d38d3e_6a5b_4d52_bfe2_2f1e634bfac4); pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { let mut function_output_ids = HashSet::new(); @@ -40,14 +43,14 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { for (idx, item) in items.iter().enumerate() { match item { - ResponseItem::FunctionCall { call_id, .. } + ResponseItem::FunctionCall { id, call_id, .. } if !function_output_ids.contains(call_id.as_str()) => { info!("Function call output is missing for call id: {call_id}"); missing_outputs_to_insert.push(( idx, ResponseItem::FunctionCallOutput { - id: None, + id: synthetic_output_id("fco", id.as_deref()), call_id: call_id.clone(), output: FunctionCallOutputPayload::from_text("aborted".to_string()), internal_chat_message_metadata_passthrough: None, @@ -55,6 +58,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { )); } ResponseItem::ToolSearchCall { + id, call_id: Some(call_id), .. } if !tool_search_output_ids.contains(call_id.as_str()) => { @@ -62,7 +66,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { missing_outputs_to_insert.push(( idx, ResponseItem::ToolSearchOutput { - id: None, + id: synthetic_output_id("tso", id.as_deref()), call_id: Some(call_id.clone()), status: "completed".to_string(), execution: "client".to_string(), @@ -71,7 +75,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { }, )); } - ResponseItem::CustomToolCall { call_id, .. } + ResponseItem::CustomToolCall { id, call_id, .. } if !custom_tool_output_ids.contains(call_id.as_str()) => { error_or_panic(format!( @@ -80,7 +84,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { missing_outputs_to_insert.push(( idx, ResponseItem::CustomToolCallOutput { - id: None, + id: synthetic_output_id("ctco", id.as_deref()), call_id: call_id.clone(), name: None, output: FunctionCallOutputPayload::from_text("aborted".to_string()), @@ -90,6 +94,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { } // LocalShellCall is represented in upstream streams by a FunctionCallOutput ResponseItem::LocalShellCall { + id, call_id: Some(call_id), .. } if !function_output_ids.contains(call_id.as_str()) => { @@ -99,7 +104,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { missing_outputs_to_insert.push(( idx, ResponseItem::FunctionCallOutput { - id: None, + id: synthetic_output_id("fco", id.as_deref()), call_id: call_id.clone(), output: FunctionCallOutputPayload::from_text("aborted".to_string()), internal_chat_message_metadata_passthrough: None, @@ -121,6 +126,21 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { } } +/// Derives a stable ID for a prompt-only output from its source call's item ID. +/// +/// Prompt normalization can run repeatedly without persisting its synthetic +/// outputs, so the namespace and name format must remain stable across retries +/// and resumes to preserve prompt-cache reuse. Returning `None` when the source +/// call has no ID preserves the legacy behavior for older history items. +fn synthetic_output_id(prefix: &str, item_id: Option<&str>) -> Option { + let source_id = item_id.filter(|id| !id.is_empty())?; + let name = format!("{prefix}:{source_id}"); + Some(format!( + "{prefix}_{}", + Uuid::new_v5(&SYNTHETIC_OUTPUT_ID_NAMESPACE, name.as_bytes()) + )) +} + pub(crate) fn remove_orphan_outputs(items: &mut Vec) { let function_call_ids: HashSet = items .iter() diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 7acf67da5..e109e51e6 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -363,6 +363,107 @@ async fn response_item_ids_persist_across_resume_and_preserve_server_ids() -> an Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn synthetic_call_output_id_is_stable_across_resumes() -> anyhow::Result<()> { + let function_call_id = "missing-output-call"; + let thread_id = ThreadId::default(); + let rollout = vec![ + RolloutLine { + timestamp: "2024-01-01T00:00:00.000Z".to_string(), + item: RolloutItem::SessionMeta(SessionMetaLine { + meta: SessionMeta { + session_id: thread_id.into(), + id: thread_id, + parent_thread_id: None, + timestamp: "2024-01-01T00:00:00Z".to_string(), + cwd: ".".into(), + originator: "test_originator".to_string(), + cli_version: "test_version".to_string(), + model_provider: Some("test-provider".to_string()), + ..Default::default() + }, + git: None, + }), + }, + RolloutLine { + timestamp: "2024-01-01T00:00:01.000Z".to_string(), + item: RolloutItem::ResponseItem(ResponseItem::FunctionCall { + id: Some("fc_existing".to_string()), + name: "do_it".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: function_call_id.to_string(), + internal_chat_message_metadata_passthrough: None, + }), + }, + ]; + let tmpdir = TempDir::new()?; + let session_path = tmpdir.path().join("normalized-call-output-item-id.jsonl"); + let mut file = std::fs::File::create(&session_path)?; + for line in rollout { + writeln!(file, "{}", serde_json::to_string(&line)?)?; + } + + let server = MockServer::start().await; + let response_mock = mount_sse_sequence( + &server, + vec![ + sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), + sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]), + ], + ) + .await; + let codex_home = Arc::new(TempDir::new()?); + let mut builder = test_codex().with_config(|config| { + let _ = config.features.enable(Feature::ItemIds); + }); + let first = builder + .resume(&server, Arc::clone(&codex_home), session_path.clone()) + .await?; + + first.submit_turn("first resume").await?; + first.codex.submit(Op::Shutdown).await?; + wait_for_event(&first.codex, |event| { + matches!(event, EventMsg::ShutdownComplete) + }) + .await; + assert!( + !std::fs::read_to_string(&session_path)?.contains("\"type\":\"function_call_output\""), + "prompt-only repair should not be persisted to the rollout" + ); + + builder = builder.with_config(|config| { + let _ = config.features.enable(Feature::ItemIds); + }); + let second = builder.resume(&server, codex_home, session_path).await?; + second.submit_turn("second resume").await?; + + let requests = response_mock.requests(); + assert_eq!(requests.len(), 2); + let first_output = requests[0].function_call_output(function_call_id); + let first_output_id = first_output + .get("id") + .and_then(serde_json::Value::as_str) + .expect("reconstructed output should have an item ID") + .to_string(); + let first_output_uuid = first_output_id + .strip_prefix("fco_") + .expect("synthetic output should use the Responses API prefix"); + assert_eq!( + Uuid::parse_str(first_output_uuid)?.get_version(), + Some(uuid::Version::Sha1) + ); + assert_eq!( + requests[1] + .function_call_output(function_call_id) + .get("id") + .and_then(serde_json::Value::as_str), + Some(first_output_id.as_str()) + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn response_item_ids_are_sent_for_all_remote_v2_compaction_requests() -> anyhow::Result<()> { let server = MockServer::start().await;