diff --git a/codex-rs/app-server-protocol/src/protocol/common.rs b/codex-rs/app-server-protocol/src/protocol/common.rs index f22cab25f..edeabfc7c 100644 --- a/codex-rs/app-server-protocol/src/protocol/common.rs +++ b/codex-rs/app-server-protocol/src/protocol/common.rs @@ -3082,8 +3082,10 @@ mod tests { request_id: RequestId::Integer(9), params: v2::ThreadRealtimeStartParams { architecture: Some(RealtimeConversationArchitecture::Avas), + client_managed_handoffs: Some(true), codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: Some("silent context".to_string()), thread_id: "thr_123".to_string(), model: Some("realtime-treatment-model".to_string()), output_modality: RealtimeOutputModality::Audio, @@ -3102,8 +3104,10 @@ mod tests { "params": { "architecture": "avas", "threadId": "thr_123", + "clientManagedHandoffs": true, "codexResponsesAsItems": null, "codexResponseItemPrefix": null, + "codexResponseHandoffPrefix": "silent context", "model": "realtime-treatment-model", "outputModality": "audio", "includeStartupContext": false, @@ -3125,8 +3129,10 @@ mod tests { request_id: RequestId::Integer(9), params: v2::ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, @@ -3145,8 +3151,10 @@ mod tests { "params": { "architecture": null, "threadId": "thr_123", + "clientManagedHandoffs": null, "codexResponsesAsItems": null, "codexResponseItemPrefix": null, + "codexResponseHandoffPrefix": null, "model": null, "outputModality": "audio", "includeStartupContext": null, @@ -3163,8 +3171,10 @@ mod tests { request_id: RequestId::Integer(9), params: v2::ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, @@ -3183,8 +3193,10 @@ mod tests { "params": { "architecture": null, "threadId": "thr_123", + "clientManagedHandoffs": null, "codexResponsesAsItems": null, "codexResponseItemPrefix": null, + "codexResponseHandoffPrefix": null, "model": null, "outputModality": "audio", "includeStartupContext": null, @@ -3367,8 +3379,10 @@ mod tests { request_id: RequestId::Integer(1), params: v2::ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, diff --git a/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs b/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs index 793b316f5..b30ecd7b6 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs @@ -70,12 +70,21 @@ pub struct ThreadRealtimeStartParams { /// Overrides the configured realtime architecture for this session only. #[ts(optional = nullable)] pub architecture: Option, + /// Leaves Codex response handoffs to the client's explicit append calls instead of forwarding + /// them automatically. Defaults to false. + #[ts(optional = nullable)] + pub client_managed_handoffs: Option, /// Sends automatic Codex responses as realtime conversation items instead of handoff appends. #[ts(optional = nullable)] pub codex_responses_as_items: Option, /// Optional prefix added to automatic Codex response items when `codexResponsesAsItems` is true. #[ts(optional = nullable)] pub codex_response_item_prefix: Option, + /// Optional prefix added to automatic V1 Codex commentary sent with + /// `conversation.handoff.append` when `codexResponsesAsItems` is not true. Final answers are + /// sent without the prefix. + #[ts(optional = nullable)] + pub codex_response_handoff_prefix: Option, /// Overrides the configured realtime model for this session only. #[ts(optional = nullable)] pub model: Option, diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index d34231391..7dde50d19 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -165,7 +165,7 @@ Example with notification opt-out: - `thread/inject_items` — append raw Responses API items to a loaded thread’s model-visible history without starting a user turn; returns `{}` on success. - `turn/steer` — add user input to an already in-flight regular turn without starting a new turn; returns the active `turnId` that accepted the input. `clientUserMessageId` is optional; when supplied, the corresponding `userMessage` item echoes it as `clientId`. Review and manual compaction turns reject `turn/steer`. - `turn/interrupt` — request cancellation of an in-flight turn by `(thread_id, turn_id)`; success is an empty `{}` response and the turn finishes with `status: "interrupted"`. -- `thread/realtime/start` — start a thread-scoped realtime session (experimental); pass `outputModality: "text"` or `outputModality: "audio"` to choose model output, optionally pass `model` and `version` to override configured realtime selection for this session only, and pass `includeStartupContext: false` to omit Codex's generated startup context. By default, automatic Codex text follows the protocol's speakable output path. Pass `codexResponsesAsItems: true` to send automatic Codex responses as realtime conversation items instead, and optionally pass `codexResponseItemPrefix` to prepend experiment instructions to those items. Returns `{}` and streams `thread/realtime/*` notifications. Omit `transport` for the websocket transport, or pass `{ "type": "webrtc", "sdp": "..." }` to create a WebRTC session from a browser-generated SDP offer; the remote answer SDP is emitted as `thread/realtime/sdp`. +- `thread/realtime/start` — start a thread-scoped realtime session (experimental); pass `outputModality: "text"` or `outputModality: "audio"` to choose model output, optionally pass `model` and `version` to override configured realtime selection for this session only, and pass `includeStartupContext: false` to omit Codex's generated startup context. By default, automatic Codex text follows the protocol's speakable output path. Pass `clientManagedHandoffs: true` to disable automatic Codex response delivery so only the client's explicit append calls produce handoffs. Pass `codexResponsesAsItems: true` to send automatic Codex responses as realtime conversation items instead, and optionally pass `codexResponseItemPrefix` to prepend experiment instructions to those items. For V1 sessions, pass `codexResponseHandoffPrefix` while item mode is disabled to route automatic Codex commentary through `conversation.handoff.append` with that prefix; final answers remain unprefixed. Returns `{}` and streams `thread/realtime/*` notifications. Omit `transport` for the websocket transport, or pass `{ "type": "webrtc", "sdp": "..." }` to create a WebRTC session from a browser-generated SDP offer; the remote answer SDP is emitted as `thread/realtime/sdp`. - `thread/realtime/appendAudio` — append an input audio chunk to the active realtime session (experimental); returns `{}`. - `thread/realtime/appendText` — append text input to the active realtime session with a required `role` of `user` or `developer` (experimental); returns `{}`. Older clients that omit `role` default to `user`. - `thread/realtime/appendSpeech` — append text that the realtime model should speak to the user (experimental); returns `{}`. @@ -882,12 +882,19 @@ Clients may also pass `model` and `version` on `thread/realtime/start` to select different realtime session configuration without changing thread or user config. Pass `includeStartupContext: false` to skip Codex's startup context for this session while still using the selected backend prompt. +Pass `clientManagedHandoffs: true` to suppress automatic Codex response handoffs +and items. The client can then choose which updates to deliver with +`thread/realtime/appendText` or `thread/realtime/appendSpeech`. Pass `codexResponsesAsItems: true` to inject automatic Codex responses with `conversation.item.create` instead of the protocol's default speakable output path. When using that mode, `codexResponseItemPrefix` can prepend short experiment instructions to each automatic Codex response item. Omit `codexResponsesAsItems`, or pass `false`, to preserve the default speakable -behavior. Call +behavior. For V1 sessions, `codexResponseHandoffPrefix` instead routes automatic +Codex commentary through `conversation.handoff.append` and prepends the provided +text. Final answers remain unprefixed. Item mode takes precedence when +`codexResponsesAsItems` is true. +Call `thread/realtime/appendText` to append app-provided realtime text items, or `thread/realtime/appendSpeech` when the app decides a realtime update should be spoken. diff --git a/codex-rs/app-server/src/request_processors/turn_processor.rs b/codex-rs/app-server/src/request_processors/turn_processor.rs index d48e8c04b..fbfcb797b 100644 --- a/codex-rs/app-server/src/request_processors/turn_processor.rs +++ b/codex-rs/app-server/src/request_processors/turn_processor.rs @@ -931,8 +931,10 @@ impl TurnRequestProcessor { thread.as_ref(), Op::RealtimeConversationStart(ConversationStartParams { architecture: params.architecture, + client_managed_handoffs: params.client_managed_handoffs.unwrap_or(false), codex_responses_as_items: params.codex_responses_as_items.unwrap_or(false), codex_response_item_prefix: params.codex_response_item_prefix, + codex_response_handoff_prefix: params.codex_response_handoff_prefix, model: params.model, output_modality: params.output_modality, include_startup_context: params.include_startup_context.unwrap_or(true), diff --git a/codex-rs/app-server/tests/suite/v2/experimental_api.rs b/codex-rs/app-server/tests/suite/v2/experimental_api.rs index b12bd43b4..3b4f6e949 100644 --- a/codex-rs/app-server/tests/suite/v2/experimental_api.rs +++ b/codex-rs/app-server/tests/suite/v2/experimental_api.rs @@ -80,8 +80,10 @@ async fn realtime_conversation_start_requires_experimental_api_capability() -> R let request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, @@ -192,8 +194,10 @@ async fn realtime_webrtc_start_requires_experimental_api_capability() -> Result< let request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, diff --git a/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs b/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs index f88ed799a..274ee9b90 100644 --- a/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs +++ b/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs @@ -86,6 +86,8 @@ const V2_HANDOFF_COMPLETE_ACKNOWLEDGEMENT: &str = "Background agent finished. Use the preceding [BACKEND] messages as the result."; const RESPONSE_ITEM_PREFIX: &str = "Use the following context to inform future responses, but do not speak it to the user."; +const RESPONSE_HANDOFF_PREFIX: &str = + "Silent Codex context. Do not speak, acknowledge, or summarize this item."; #[derive(Debug, Clone, Copy)] enum StartupContextConfig<'a> { @@ -313,8 +315,9 @@ impl RealtimeE2eHarness { } async fn start_webrtc_realtime(&mut self, offer_sdp: &str) -> Result { - self.start_webrtc_realtime_with_codex_responses_as_items( - offer_sdp, /*codex_responses_as_items*/ None, + self.start_webrtc_realtime_with_codex_response_routing( + offer_sdp, /*client_managed_handoffs*/ None, + /*codex_responses_as_items*/ None, /*codex_response_handoff_prefix*/ None, ) .await } @@ -323,17 +326,21 @@ impl RealtimeE2eHarness { &mut self, offer_sdp: &str, ) -> Result { - self.start_webrtc_realtime_with_codex_responses_as_items( + self.start_webrtc_realtime_with_codex_response_routing( offer_sdp, + /*client_managed_handoffs*/ None, /*codex_responses_as_items*/ Some(true), + /*codex_response_handoff_prefix*/ None, ) .await } - async fn start_webrtc_realtime_with_codex_responses_as_items( + async fn start_webrtc_realtime_with_codex_response_routing( &mut self, offer_sdp: &str, + client_managed_handoffs: Option, codex_responses_as_items: Option, + codex_response_handoff_prefix: Option<&str>, ) -> Result { // Starts realtime through the public JSON-RPC method, then waits for the same client-visible // notifications a desktop app needs: started first, SDP answer second. @@ -341,10 +348,12 @@ impl RealtimeE2eHarness { .mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs, thread_id: self.thread_id.clone(), codex_response_item_prefix: codex_responses_as_items .unwrap_or(false) .then(|| RESPONSE_ITEM_PREFIX.to_string()), + codex_response_handoff_prefix: codex_response_handoff_prefix.map(str::to_string), codex_responses_as_items, model: None, output_modality: RealtimeOutputModality::Audio, @@ -611,8 +620,10 @@ async fn realtime_conversation_streams_v2_notifications() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: thread_start.thread.id.clone(), model: Some("realtime-treatment-model".to_string()), output_modality: RealtimeOutputModality::Audio, @@ -867,8 +878,10 @@ async fn realtime_start_can_skip_startup_context() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: thread_start.thread.id.clone(), model: None, output_modality: RealtimeOutputModality::Audio, @@ -964,8 +977,10 @@ async fn realtime_text_output_modality_requests_text_output_and_final_transcript let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: thread_start.thread.id.clone(), model: None, output_modality: RealtimeOutputModality::Text, @@ -1144,8 +1159,10 @@ async fn realtime_conversation_stop_emits_closed_notification() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: thread_start.thread.id.clone(), model: None, output_modality: RealtimeOutputModality::Audio, @@ -1247,8 +1264,10 @@ async fn realtime_webrtc_start_emits_sdp_notification() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: thread_id.clone(), model: None, output_modality: RealtimeOutputModality::Audio, @@ -1480,6 +1499,135 @@ async fn webrtc_v1_default_automatic_output_uses_handoff_append() -> Result<()> Ok(()) } +#[tokio::test] +async fn webrtc_v1_client_managed_handoffs_disable_automatic_output() -> Result<()> { + skip_if_no_network!(Ok(())); + + let mut harness = RealtimeE2eHarness::new( + RealtimeTestVersion::V1, + main_loop_responses(vec![create_final_assistant_message_sse_response( + "client-managed output", + )?]), + realtime_sideband(vec![realtime_sideband_connection(vec![ + vec![session_updated("sess_v1_client_managed_handoffs")], + vec![], + ])]), + ) + .await?; + + let started = harness + .start_webrtc_realtime_with_codex_response_routing( + "v=offer\r\n", + /*client_managed_handoffs*/ Some(true), + /*codex_responses_as_items*/ None, + /*codex_response_handoff_prefix*/ None, + ) + .await?; + assert_eq!(started.started.version, RealtimeConversationVersion::V1); + assert_v1_session_update(&harness.sideband_outbound_request(/*request_index*/ 0).await)?; + + let turn_request_id = harness + .mcp + .send_turn_start_request(TurnStartParams { + thread_id: harness.thread_id.clone(), + input: vec![V2UserInput::Text { + text: "leave realtime delivery to the client".to_string(), + text_elements: Vec::new(), + }], + ..Default::default() + }) + .await?; + let turn_response: JSONRPCResponse = timeout( + DEFAULT_TIMEOUT, + harness + .mcp + .read_stream_until_response_message(RequestId::Integer(turn_request_id)), + ) + .await??; + let _: TurnStartResponse = to_response(turn_response)?; + let _ = harness + .read_notification::("turn/completed") + .await?; + + let automatic_handoff = timeout( + Duration::from_millis(200), + harness + .realtime_server + .wait_for_request(/*connection_index*/ 0, /*request_index*/ 1), + ) + .await; + assert!( + automatic_handoff.is_err(), + "automatic Codex output should not reach realtime in client-managed handoff mode" + ); + + harness + .append_speech(harness.thread_id.clone(), "client-selected speech") + .await?; + assert_eq!( + harness.sideband_outbound_request(/*request_index*/ 1).await, + json!({ + "type": "conversation.handoff.append", + "handoff_id": "codex", + "output_text": "client-selected speech", + }) + ); + + harness.shutdown().await; + Ok(()) +} + +#[tokio::test] +async fn webrtc_v1_final_automatic_handoff_omits_silent_prefix() -> Result<()> { + skip_if_no_network!(Ok(())); + + let mut harness = RealtimeE2eHarness::new( + RealtimeTestVersion::V1, + main_loop_responses(vec![create_final_assistant_message_sse_response( + "background progress", + )?]), + realtime_sideband(vec![realtime_sideband_connection(vec![ + vec![ + session_updated("sess_v1_prefixed_handoff"), + json!({ + "type": "conversation.handoff.requested", + "handoff_id": "handoff_prefixed", + "item_id": "item_prefixed", + "input_transcript": "run the background task" + }), + ], + vec![], + vec![], + ])]), + ) + .await?; + + let started = harness + .start_webrtc_realtime_with_codex_response_routing( + "v=offer\r\n", + /*client_managed_handoffs*/ None, + /*codex_responses_as_items*/ None, + Some(RESPONSE_HANDOFF_PREFIX), + ) + .await?; + assert_eq!(started.started.version, RealtimeConversationVersion::V1); + let _ = harness + .read_notification::("turn/completed") + .await?; + + assert_eq!( + harness.sideband_outbound_request(/*request_index*/ 1).await, + json!({ + "type": "conversation.handoff.append", + "handoff_id": "handoff_prefixed", + "output_text": "background progress", + }) + ); + + harness.shutdown().await; + Ok(()) +} + #[tokio::test] async fn webrtc_v1_handoff_request_delegates_context_and_manual_append_speaks() -> Result<()> { skip_if_no_network!(Ok(())); @@ -2468,8 +2616,10 @@ async fn realtime_webrtc_start_surfaces_backend_error() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: thread_start.thread.id, model: None, output_modality: RealtimeOutputModality::Audio, @@ -2533,8 +2683,10 @@ async fn realtime_conversation_requires_feature_flag() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { architecture: None, + client_managed_handoffs: None, codex_responses_as_items: None, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, thread_id: thread_start.thread.id.clone(), model: None, output_modality: RealtimeOutputModality::Audio, diff --git a/codex-rs/core/src/realtime_conversation.rs b/codex-rs/core/src/realtime_conversation.rs index 2b5eb91a2..b68422846 100644 --- a/codex-rs/core/src/realtime_conversation.rs +++ b/codex-rs/core/src/realtime_conversation.rs @@ -30,6 +30,7 @@ use codex_login::read_openai_api_key_from_env; use codex_model_provider_info::ModelProviderInfo; use codex_protocol::error::CodexErr; use codex_protocol::error::Result as CodexResult; +use codex_protocol::models::MessagePhase; use codex_protocol::protocol::CodexErrorInfo; use codex_protocol::protocol::ConversationAudioParams; use codex_protocol::protocol::ConversationSpeechParams; @@ -107,8 +108,10 @@ struct RealtimeHandoffState { output_tx: Sender, active_handoff: Arc>>, last_output_text: Arc>>, + client_managed_handoffs: bool, codex_responses_as_items: bool, codex_response_item_prefix: Option, + codex_response_handoff_prefix: Option, session_kind: RealtimeSessionKind, } @@ -116,6 +119,7 @@ struct RealtimeHandoffState { enum RealtimeOutbound { StandaloneHandoff { text: String }, HandoffUpdate { handoff_id: String, text: String }, + HandoffAppend { handoff_id: String, text: String }, CompletedHandoff { handoff_id: String, text: String }, ConversationItem { text: String }, HandoffCompleteAck { handoff_id: String }, @@ -210,16 +214,20 @@ struct RealtimeInputChannels { impl RealtimeHandoffState { fn new( output_tx: Sender, + client_managed_handoffs: bool, codex_responses_as_items: bool, codex_response_item_prefix: Option, + codex_response_handoff_prefix: Option, session_kind: RealtimeSessionKind, ) -> Self { Self { output_tx, active_handoff: Arc::new(Mutex::new(None)), last_output_text: Arc::new(Mutex::new(None)), + client_managed_handoffs, codex_responses_as_items, codex_response_item_prefix, + codex_response_handoff_prefix, session_kind, } } @@ -240,8 +248,10 @@ struct RealtimeStart { api_provider: ApiProvider, architecture: RealtimeConversationArchitecture, extra_headers: Option, + client_managed_handoffs: bool, codex_responses_as_items: bool, codex_response_item_prefix: Option, + codex_response_handoff_prefix: Option, realtime_call_api_provider: Option, session_config: RealtimeSessionConfig, model_client: ModelClient, @@ -296,8 +306,10 @@ impl RealtimeConversationManager { api_provider, architecture, extra_headers, + client_managed_handoffs, codex_responses_as_items, codex_response_item_prefix, + codex_response_handoff_prefix, realtime_call_api_provider, session_config, model_client, @@ -321,8 +333,10 @@ impl RealtimeConversationManager { let realtime_active = Arc::new(AtomicBool::new(true)); let handoff = RealtimeHandoffState::new( handoff_output_tx, + client_managed_handoffs, codex_responses_as_items, codex_response_item_prefix, + codex_response_handoff_prefix, session_kind, ); let input_channels = RealtimeInputChannels { @@ -479,7 +493,11 @@ impl RealtimeConversationManager { Ok(()) } - pub(crate) async fn handoff_out(&self, output_text: String) -> CodexResult<()> { + pub(crate) async fn handoff_out( + &self, + output_text: String, + phase: Option, + ) -> CodexResult<()> { let handoff = { let guard = self.state.lock().await; let Some(state) = guard.as_ref() else { @@ -490,6 +508,13 @@ impl RealtimeConversationManager { state.handoff.clone() }; + if handoff.client_managed_handoffs { + return Ok(()); + } + let response_handoff_prefix = match phase { + Some(MessagePhase::Commentary) => handoff.codex_response_handoff_prefix.clone(), + Some(MessagePhase::FinalAnswer) | None => None, + }; let active_handoff = handoff.active_handoff.lock().await.clone(); let output = match active_handoff { Some(handoff_id) => { @@ -502,6 +527,16 @@ impl RealtimeConversationManager { handoff.codex_response_item_prefix.as_deref(), ), } + } else if handoff.session_kind == RealtimeSessionKind::V1 + && handoff.codex_response_handoff_prefix.is_some() + { + RealtimeOutbound::HandoffAppend { + handoff_id, + text: realtime_backend_item( + output_text, + response_handoff_prefix.as_deref(), + ), + } } else { RealtimeOutbound::HandoffUpdate { handoff_id, @@ -520,7 +555,13 @@ impl RealtimeConversationManager { ), } } else { - RealtimeOutbound::StandaloneHandoff { text: output_text } + RealtimeOutbound::StandaloneHandoff { + text: if handoff.session_kind == RealtimeSessionKind::V1 { + realtime_backend_item(output_text, response_handoff_prefix.as_deref()) + } else { + output_text + }, + } } } }; @@ -565,6 +606,9 @@ impl RealtimeConversationManager { let Some(handoff) = handoff else { return Ok(()); }; + if handoff.client_managed_handoffs { + return Ok(()); + } match handoff.session_kind { RealtimeSessionKind::V1 => return Ok(()), RealtimeSessionKind::V2 => {} @@ -675,8 +719,10 @@ struct PreparedRealtimeConversationStart { api_provider: ApiProvider, architecture: RealtimeConversationArchitecture, extra_headers: Option, + client_managed_handoffs: bool, codex_responses_as_items: bool, codex_response_item_prefix: Option, + codex_response_handoff_prefix: Option, realtime_call_api_provider: Option, requested_realtime_session_id: Option, version: RealtimeWsVersion, @@ -744,8 +790,10 @@ async fn prepare_realtime_start( api_provider, architecture, extra_headers, + client_managed_handoffs: params.client_managed_handoffs, codex_responses_as_items: params.codex_responses_as_items, codex_response_item_prefix: params.codex_response_item_prefix, + codex_response_handoff_prefix: params.codex_response_handoff_prefix, realtime_call_api_provider, requested_realtime_session_id, version, @@ -914,8 +962,10 @@ async fn handle_start_inner( api_provider, architecture, extra_headers, + client_managed_handoffs, codex_responses_as_items, codex_response_item_prefix, + codex_response_handoff_prefix, realtime_call_api_provider, requested_realtime_session_id, version, @@ -931,8 +981,10 @@ async fn handle_start_inner( api_provider, architecture, extra_headers, + client_managed_handoffs, codex_responses_as_items, codex_response_item_prefix, + codex_response_handoff_prefix, realtime_call_api_provider, session_config, model_client: sess.services.model_client.clone(), @@ -1368,6 +1420,11 @@ async fn handle_handoff_output( .send_conversation_function_call_output(handoff_id, text) .await } + RealtimeOutbound::HandoffAppend { handoff_id, text } => { + writer + .send_conversation_handoff_append(handoff_id, text) + .await + } RealtimeOutbound::ConversationItem { text } => { writer .send_conversation_item_create(text, ConversationTextRole::Developer) @@ -1388,7 +1445,8 @@ async fn handle_handoff_output( .await; } } - RealtimeOutbound::HandoffUpdate { handoff_id, text } => { + RealtimeOutbound::HandoffUpdate { handoff_id, text } + | RealtimeOutbound::HandoffAppend { handoff_id, text } => { let active_handoff = handoff_state.active_handoff.lock().await.clone(); match active_handoff { Some(active_handoff) if active_handoff == handoff_id => {} diff --git a/codex-rs/core/src/realtime_conversation_tests.rs b/codex-rs/core/src/realtime_conversation_tests.rs index 147e0cc12..55525774b 100644 --- a/codex-rs/core/src/realtime_conversation_tests.rs +++ b/codex-rs/core/src/realtime_conversation_tests.rs @@ -130,8 +130,10 @@ async fn clears_active_handoff_explicitly() { let (tx, _rx) = bounded(1); let state = RealtimeHandoffState::new( tx, + /*client_managed_handoffs*/ false, /*codex_responses_as_items*/ false, /*codex_response_item_prefix*/ None, + /*codex_response_handoff_prefix*/ None, RealtimeSessionKind::V1, ); diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index e21b5394d..0ecc70681 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -1809,13 +1809,13 @@ impl Session { } async fn maybe_mirror_event_text_to_realtime(&self, msg: &EventMsg) { - let Some(text) = realtime_text_for_event(msg) else { + let Some((text, phase)) = realtime_text_for_event(msg) else { return; }; if self.conversation.running_state().await.is_none() { return; } - if let Err(err) = self.conversation.handoff_out(text).await { + if let Err(err) = self.conversation.handoff_out(text, phase).await { debug!("failed to mirror event text to realtime conversation: {err}"); } } diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index 4beef792b..257e74120 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -1460,11 +1460,11 @@ fn agent_message_text(item: &codex_protocol::items::AgentMessageItem) -> String .collect() } -pub(super) fn realtime_text_for_event(msg: &EventMsg) -> Option { +pub(super) fn realtime_text_for_event(msg: &EventMsg) -> Option<(String, Option)> { match msg { - EventMsg::AgentMessage(event) => Some(event.message.clone()), + EventMsg::AgentMessage(event) => Some((event.message.clone(), event.phase.clone())), EventMsg::ItemCompleted(event) => match &event.item { - TurnItem::AgentMessage(item) => Some(agent_message_text(item)), + TurnItem::AgentMessage(item) => Some((agent_message_text(item), item.phase.clone())), _ => None, }, EventMsg::Error(_) diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index 365cf35b0..40c01fa1b 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -204,8 +204,10 @@ async fn start_realtime_conversation(codex: &codex_core::CodexThread) -> Result< codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, diff --git a/codex-rs/core/tests/suite/realtime_conversation.rs b/codex-rs/core/tests/suite/realtime_conversation.rs index a2aa6d61d..dccf460f0 100644 --- a/codex-rs/core/tests/suite/realtime_conversation.rs +++ b/codex-rs/core/tests/suite/realtime_conversation.rs @@ -67,6 +67,7 @@ const MEMORY_PROMPT_PHRASE: &str = "You have access to a memory folder with guidance from prior runs."; const REALTIME_CONVERSATION_TEST_SUBPROCESS_ENV_VAR: &str = "CODEX_REALTIME_CONVERSATION_TEST_SUBPROCESS"; +const SILENT_CONTEXT_PREFIX: &str = "[BACKEND] Silent Codex context. Do not speak, acknowledge, or summarize this item. Wait for an explicit speakable handoff or direct user request."; #[derive(Debug, Clone)] struct RealtimeCallRequestCapture { @@ -285,8 +286,10 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -430,8 +433,10 @@ async fn conversation_start_defaults_to_v2_and_gpt_realtime_1_5() -> Result<()> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -524,8 +529,10 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: Some("session-override-model".to_string()), output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -707,8 +714,10 @@ async fn conversation_webrtc_start_uses_avas_architecture_query() -> Result<()> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: Some(RealtimeConversationArchitecture::Avas), + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -808,8 +817,10 @@ async fn conversation_webrtc_start_uses_configured_call_base_url_for_avas() -> R test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: Some(RealtimeConversationArchitecture::Avas), + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -901,8 +912,10 @@ async fn conversation_webrtc_close_while_sideband_connecting_drops_pending_join( test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -991,8 +1004,10 @@ async fn conversation_webrtc_sideband_connect_failure_closes_with_error() -> Res test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1083,8 +1098,10 @@ async fn conversation_start_uses_openai_env_key_fallback_with_chatgpt_auth() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1155,8 +1172,10 @@ async fn conversation_transport_close_emits_closed_event() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1251,8 +1270,10 @@ async fn conversation_start_preflight_failure_emits_realtime_error_only() -> Res test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1301,8 +1322,10 @@ async fn conversation_start_connect_failure_emits_realtime_error_only() -> Resul test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1399,8 +1422,10 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1428,8 +1453,10 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1528,8 +1555,10 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1596,8 +1625,10 @@ async fn conversation_uses_default_realtime_backend_prompt() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1672,8 +1703,10 @@ async fn conversation_uses_empty_instructions_for_null_or_empty_prompt() -> Resu test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1741,8 +1774,10 @@ async fn conversation_uses_explicit_start_voice() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1802,8 +1837,10 @@ async fn conversation_uses_configured_realtime_voice() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1851,8 +1888,10 @@ async fn conversation_rejects_voice_for_wrong_realtime_version() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1901,8 +1940,10 @@ async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -1977,8 +2018,10 @@ async fn conversation_uses_experimental_realtime_ws_startup_context_override() - test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2047,8 +2090,10 @@ async fn conversation_disables_realtime_startup_context_with_empty_override() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2110,8 +2155,10 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2227,8 +2274,10 @@ async fn conversation_startup_context_current_thread_selects_many_turns_by_budge codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2338,8 +2387,10 @@ async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<() test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2401,8 +2452,10 @@ async fn conversation_startup_context_is_truncated_and_sent_once_per_start() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2485,8 +2538,10 @@ async fn conversation_user_text_turn_is_not_sent_to_realtime() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2585,8 +2640,10 @@ async fn realtime_v2_noop_tool_call_returns_empty_function_output_without_respon test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2687,8 +2744,10 @@ async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Re test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2764,6 +2823,10 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() -> skip_if_no_network!(Ok(())); let (gate_second_message_tx, gate_second_message_rx) = oneshot::channel(); + let mut commentary_message = responses::ev_assistant_message("msg-1", "assistant message 1"); + commentary_message["item"]["phase"] = json!("commentary"); + let mut final_message = responses::ev_assistant_message("msg-2", "assistant message 2"); + final_message["item"]["phase"] = json!("final_answer"); let first_chunks = vec![ StreamingSseChunk { gate: None, @@ -2771,17 +2834,11 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() -> }, StreamingSseChunk { gate: None, - body: sse_event(responses::ev_assistant_message( - "msg-1", - "assistant message 1", - )), + body: sse_event(commentary_message), }, StreamingSseChunk { gate: Some(gate_second_message_rx), - body: sse_event(responses::ev_assistant_message( - "msg-2", - "assistant message 2", - )), + body: sse_event(final_message), }, StreamingSseChunk { gate: None, @@ -2827,8 +2884,10 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: Some(SILENT_CONTEXT_PREFIX.to_string()), model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -2873,7 +2932,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() -> ); assert_eq!( first_append.body_json()["output_text"].as_str(), - Some("\"Agent Final Message\":\n\nassistant message 1") + Some(format!("{SILENT_CONTEXT_PREFIX}\n\nassistant message 1").as_str()) ); let _ = wait_for_event_match(&test.codex, |msg| match msg { @@ -2899,7 +2958,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() -> ); assert_eq!( second_append.body_json()["output_text"].as_str(), - Some("\"Agent Final Message\":\n\nassistant message 2") + Some("assistant message 2") ); let completion = completions @@ -2982,8 +3041,10 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -3095,8 +3156,10 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -3201,8 +3264,10 @@ async fn inbound_handoff_request_sends_transcript_delta_after_each_handoff() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -3305,8 +3370,10 @@ async fn inbound_conversation_item_does_not_start_turn_and_still_forwards_audio( test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -3431,8 +3498,10 @@ async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_au test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -3587,8 +3656,10 @@ async fn inbound_handoff_request_does_not_block_realtime_event_forwarding() -> R test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -3732,8 +3803,10 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, @@ -3888,8 +3961,10 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio() test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { architecture: None, + client_managed_handoffs: false, codex_responses_as_items: false, codex_response_item_prefix: None, + codex_response_handoff_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, include_startup_context: true, diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index a0e5b24d4..f87d71b21 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -182,10 +182,16 @@ pub struct McpServerRefreshConfig { pub struct ConversationStartParams { /// Overrides the configured realtime architecture for this session only. pub architecture: Option, + /// Whether Codex response handoffs are managed through explicit client append calls. + pub client_managed_handoffs: bool, /// Sends automatic Codex responses as realtime conversation items instead of handoff appends. pub codex_responses_as_items: bool, /// Optional prefix added to automatic Codex response items when `codex_responses_as_items` is set. pub codex_response_item_prefix: Option, + /// Optional prefix added to automatic V1 Codex commentary sent with + /// `conversation.handoff.append` when `codex_responses_as_items` is not set. Final answers are + /// sent without the prefix. + pub codex_response_handoff_prefix: Option, /// Overrides the configured realtime model for this session only. pub model: Option, /// Selects whether the realtime session should produce text or audio output.