From d5b4b9837017c0fde8c4738968ae2471723989f6 Mon Sep 17 00:00:00 2001 From: guinness-oai Date: Mon, 15 Jun 2026 17:14:22 -0700 Subject: [PATCH] Add a toggle for realtime startup context (#28405) ## Summary - Add `includeStartupContext` to realtime start requests so callers can explicitly skip Codex startup context while keeping the backend prompt - Thread the new flag through protocol types, request processing, and realtime session config - Update app-server docs and coverage for the new default and opt-out behavior ## Testing - Added protocol serialization coverage for `includeStartupContext` - Added realtime integration coverage for starting a session with startup context disabled --- .../src/protocol/common.rs | 7 ++ .../src/protocol/v2/realtime.rs | 3 + codex-rs/app-server/README.md | 4 +- .../src/request_processors/turn_processor.rs | 1 + .../tests/suite/v2/experimental_api.rs | 2 + .../tests/suite/v2/realtime_conversation.rs | 81 +++++++++++++++++++ codex-rs/core/src/realtime_conversation.rs | 56 ++++++------- codex-rs/core/tests/suite/compact_remote.rs | 1 + .../core/tests/suite/realtime_conversation.rs | 38 +++++++++ codex-rs/protocol/src/protocol.rs | 2 + 10 files changed, 167 insertions(+), 28 deletions(-) diff --git a/codex-rs/app-server-protocol/src/protocol/common.rs b/codex-rs/app-server-protocol/src/protocol/common.rs index 5b1d9034f..64ed11c64 100644 --- a/codex-rs/app-server-protocol/src/protocol/common.rs +++ b/codex-rs/app-server-protocol/src/protocol/common.rs @@ -3043,6 +3043,7 @@ mod tests { thread_id: "thr_123".to_string(), model: Some("realtime-treatment-model".to_string()), output_modality: RealtimeOutputModality::Audio, + include_startup_context: Some(false), prompt: Some(Some("You are on a call".to_string())), realtime_session_id: Some("sess_456".to_string()), transport: None, @@ -3061,6 +3062,7 @@ mod tests { "codexResponseItemPrefix": null, "model": "realtime-treatment-model", "outputModality": "audio", + "includeStartupContext": false, "prompt": "You are on a call", "realtimeSessionId": "sess_456", "transport": null, @@ -3084,6 +3086,7 @@ mod tests { thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: None, realtime_session_id: None, transport: None, @@ -3102,6 +3105,7 @@ mod tests { "codexResponseItemPrefix": null, "model": null, "outputModality": "audio", + "includeStartupContext": null, "realtimeSessionId": null, "transport": null, "version": null, @@ -3120,6 +3124,7 @@ mod tests { thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(None), realtime_session_id: None, transport: None, @@ -3138,6 +3143,7 @@ mod tests { "codexResponseItemPrefix": null, "model": null, "outputModality": "audio", + "includeStartupContext": null, "prompt": null, "realtimeSessionId": null, "transport": null, @@ -3322,6 +3328,7 @@ mod tests { thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("You are on a call".to_string())), realtime_session_id: None, transport: None, diff --git a/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs b/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs index 5bf69285f..793b316f5 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2/realtime.rs @@ -82,6 +82,9 @@ pub struct ThreadRealtimeStartParams { /// Selects text or audio output for the realtime session. Transport and voice stay /// independent so clients can choose how they connect separately from what the model emits. pub output_modality: RealtimeOutputModality, + /// Set to false to start without Codex's startup context. Omitted or null includes it. + #[ts(optional = nullable)] + pub include_startup_context: Option, #[serde( default, deserialize_with = "crate::protocol::serde_helpers::deserialize_double_option", diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index b9c70860e..b6b1048d2 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -165,7 +165,7 @@ Example with notification opt-out: - `thread/inject_items` — append raw Responses API items to a loaded thread’s model-visible history without starting a user turn; returns `{}` on success. - `turn/steer` — add user input to an already in-flight regular turn without starting a new turn; returns the active `turnId` that accepted the input. `clientUserMessageId` is optional; when supplied, the corresponding `userMessage` item echoes it as `clientId`. Review and manual compaction turns reject `turn/steer`. - `turn/interrupt` — request cancellation of an in-flight turn by `(thread_id, turn_id)`; success is an empty `{}` response and the turn finishes with `status: "interrupted"`. -- `thread/realtime/start` — start a thread-scoped realtime session (experimental); pass `outputModality: "text"` or `outputModality: "audio"` to choose model output, and optionally pass `model` and `version` to override configured realtime selection for this session only. By default, automatic Codex text follows the protocol's speakable output path. Pass `codexResponsesAsItems: true` to send automatic Codex responses as realtime conversation items instead, and optionally pass `codexResponseItemPrefix` to prepend experiment instructions to those items. Returns `{}` and streams `thread/realtime/*` notifications. Omit `transport` for the websocket transport, or pass `{ "type": "webrtc", "sdp": "..." }` to create a WebRTC session from a browser-generated SDP offer; the remote answer SDP is emitted as `thread/realtime/sdp`. +- `thread/realtime/start` — start a thread-scoped realtime session (experimental); pass `outputModality: "text"` or `outputModality: "audio"` to choose model output, optionally pass `model` and `version` to override configured realtime selection for this session only, and pass `includeStartupContext: false` to omit Codex's generated startup context. By default, automatic Codex text follows the protocol's speakable output path. Pass `codexResponsesAsItems: true` to send automatic Codex responses as realtime conversation items instead, and optionally pass `codexResponseItemPrefix` to prepend experiment instructions to those items. Returns `{}` and streams `thread/realtime/*` notifications. Omit `transport` for the websocket transport, or pass `{ "type": "webrtc", "sdp": "..." }` to create a WebRTC session from a browser-generated SDP offer; the remote answer SDP is emitted as `thread/realtime/sdp`. - `thread/realtime/appendAudio` — append an input audio chunk to the active realtime session (experimental); returns `{}`. - `thread/realtime/appendText` — append text input to the active realtime session with a required `role` of `user` or `developer` (experimental); returns `{}`. Older clients that omit `role` default to `user`. - `thread/realtime/appendSpeech` — append text that the realtime model should speak to the user (experimental); returns `{}`. @@ -879,6 +879,8 @@ Omit `prompt` to use Codex's default realtime backend prompt. Send `prompt: null `prompt: ""` when the session should start without that default backend prompt. Clients may also pass `model` and `version` on `thread/realtime/start` to select a different realtime session configuration without changing thread or user config. +Pass `includeStartupContext: false` to skip Codex's startup context for this +session while still using the selected backend prompt. Pass `codexResponsesAsItems: true` to inject automatic Codex responses with `conversation.item.create` instead of the protocol's default speakable output path. When using that mode, `codexResponseItemPrefix` can prepend short diff --git a/codex-rs/app-server/src/request_processors/turn_processor.rs b/codex-rs/app-server/src/request_processors/turn_processor.rs index e6b626aef..8d1810d87 100644 --- a/codex-rs/app-server/src/request_processors/turn_processor.rs +++ b/codex-rs/app-server/src/request_processors/turn_processor.rs @@ -956,6 +956,7 @@ impl TurnRequestProcessor { codex_response_item_prefix: params.codex_response_item_prefix, model: params.model, output_modality: params.output_modality, + include_startup_context: params.include_startup_context.unwrap_or(true), prompt: params.prompt, realtime_session_id: params.realtime_session_id, transport: params.transport.map(|transport| match transport { diff --git a/codex-rs/app-server/tests/suite/v2/experimental_api.rs b/codex-rs/app-server/tests/suite/v2/experimental_api.rs index 417fa5a39..b12bd43b4 100644 --- a/codex-rs/app-server/tests/suite/v2/experimental_api.rs +++ b/codex-rs/app-server/tests/suite/v2/experimental_api.rs @@ -85,6 +85,7 @@ async fn realtime_conversation_start_requires_experimental_api_capability() -> R thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("hello".to_string())), realtime_session_id: None, transport: None, @@ -196,6 +197,7 @@ async fn realtime_webrtc_start_requires_experimental_api_capability() -> Result< thread_id: "thr_123".to_string(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("hello".to_string())), realtime_session_id: None, transport: Some(ThreadRealtimeStartTransport::Webrtc { diff --git a/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs b/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs index 6f9ed7f67..df49f2fad 100644 --- a/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs +++ b/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs @@ -348,6 +348,7 @@ impl RealtimeE2eHarness { codex_responses_as_items, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ThreadRealtimeStartTransport::Webrtc { @@ -617,6 +618,7 @@ async fn realtime_conversation_streams_v2_notifications() -> Result<()> { thread_id: thread_start.thread.id.clone(), model: Some("realtime-treatment-model".to_string()), output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: None, realtime_session_id: None, transport: None, @@ -830,6 +832,80 @@ async fn realtime_conversation_streams_v2_notifications() -> Result<()> { Ok(()) } +#[tokio::test] +async fn realtime_start_can_skip_startup_context() -> Result<()> { + skip_if_no_network!(Ok(())); + + let responses_server = create_mock_responses_server_sequence_unchecked(Vec::new()).await; + let realtime_server = start_websocket_server(vec![vec![vec![json!({ + "type": "session.updated", + "session": { "id": "sess_backend", "instructions": "backend prompt" } + })]]]) + .await; + + let codex_home = TempDir::new()?; + create_config_toml( + codex_home.path(), + &responses_server.uri(), + realtime_server.uri(), + /*realtime_enabled*/ true, + StartupContextConfig::Generated, + )?; + + let mut mcp = TestAppServer::new(codex_home.path()).await?; + timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??; + login_with_api_key(&mut mcp, "sk-test-key").await?; + + let thread_start_request_id = mcp + .send_thread_start_request(ThreadStartParams::default()) + .await?; + let thread_start_response: JSONRPCResponse = timeout( + DEFAULT_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_start_request_id)), + ) + .await??; + let thread_start: ThreadStartResponse = to_response(thread_start_response)?; + + let start_request_id = mcp + .send_thread_realtime_start_request(ThreadRealtimeStartParams { + architecture: None, + codex_responses_as_items: None, + codex_response_item_prefix: None, + thread_id: thread_start.thread.id.clone(), + model: None, + output_modality: RealtimeOutputModality::Audio, + include_startup_context: Some(false), + prompt: None, + realtime_session_id: None, + transport: None, + version: None, + voice: None, + }) + .await?; + let start_response: JSONRPCResponse = timeout( + DEFAULT_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(start_request_id)), + ) + .await??; + let _: ThreadRealtimeStartResponse = to_response(start_response)?; + + read_notification::(&mut mcp, "thread/realtime/started") + .await?; + + let startup_context_request = realtime_server + .wait_for_request(/*connection_index*/ 0, /*request_index*/ 0) + .await; + let startup_context_body = startup_context_request.body_json(); + let instructions = startup_context_body["session"]["instructions"] + .as_str() + .context("expected realtime instructions")?; + assert_eq!(instructions, "backend prompt"); + assert!(!instructions.contains(STARTUP_CONTEXT_HEADER)); + + realtime_server.shutdown().await; + Ok(()) +} + #[tokio::test] async fn realtime_text_output_modality_requests_text_output_and_final_transcript() -> Result<()> { skip_if_no_network!(Ok(())); @@ -895,6 +971,7 @@ async fn realtime_text_output_modality_requests_text_output_and_final_transcript thread_id: thread_start.thread.id.clone(), model: None, output_modality: RealtimeOutputModality::Text, + include_startup_context: None, prompt: None, realtime_session_id: None, transport: None, @@ -1074,6 +1151,7 @@ async fn realtime_conversation_stop_emits_closed_notification() -> Result<()> { thread_id: thread_start.thread.id.clone(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1176,6 +1254,7 @@ async fn realtime_webrtc_start_emits_sdp_notification() -> Result<()> { thread_id: thread_id.clone(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ThreadRealtimeStartTransport::Webrtc { @@ -2396,6 +2475,7 @@ async fn realtime_webrtc_start_surfaces_backend_error() -> Result<()> { thread_id: thread_start.thread.id, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ThreadRealtimeStartTransport::Webrtc { @@ -2460,6 +2540,7 @@ async fn realtime_conversation_requires_feature_flag() -> Result<()> { thread_id: thread_start.thread.id.clone(), model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: None, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, diff --git a/codex-rs/core/src/realtime_conversation.rs b/codex-rs/core/src/realtime_conversation.rs index 76f7e3437..2b5eb91a2 100644 --- a/codex-rs/core/src/realtime_conversation.rs +++ b/codex-rs/core/src/realtime_conversation.rs @@ -698,6 +698,7 @@ async fn prepare_realtime_start( let config = sess.get_config().await; let transport = params .transport + .clone() .unwrap_or(ConversationStartTransport::Websocket); let mut api_provider = provider.to_api_provider(Some(AuthMode::ApiKey))?; if let Some(realtime_ws_base_url) = &config.experimental_realtime_ws_base_url { @@ -720,16 +721,7 @@ async fn prepare_realtime_start( &transport, config.realtime.session_type, )?; - let session_config = build_realtime_session_config( - sess, - params.model, - params.prompt, - params.realtime_session_id, - params.output_modality, - version, - params.voice, - ) - .await?; + let session_config = build_realtime_session_config(sess, ¶ms, version).await?; let requested_realtime_session_id = session_config.session_id.clone(); let extra_headers = match transport { ConversationStartTransport::Websocket => { @@ -791,25 +783,25 @@ fn validate_realtime_architecture( pub(crate) async fn build_realtime_session_config( sess: &Arc, - model: Option, - prompt: Option>, - realtime_session_id: Option, - output_modality: RealtimeOutputModality, + params: &ConversationStartParams, version: RealtimeWsVersion, - voice: Option, ) -> CodexResult { let config = sess.get_config().await; let prompt = prepare_realtime_backend_prompt( - prompt, + params.prompt.clone(), config.experimental_realtime_ws_backend_prompt.clone(), ); - let startup_context = match config.experimental_realtime_ws_startup_context.clone() { - Some(startup_context) => startup_context, - None => { - build_realtime_startup_context(sess.as_ref(), REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET) - .await - .unwrap_or_default() + let startup_context = if params.include_startup_context { + match config.experimental_realtime_ws_startup_context.clone() { + Some(startup_context) => startup_context, + None => { + build_realtime_startup_context(sess.as_ref(), REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET) + .await + .unwrap_or_default() + } } + } else { + String::new() }; let prompt = match (prompt.is_empty(), startup_context.is_empty()) { (true, true) => String::new(), @@ -818,7 +810,9 @@ pub(crate) async fn build_realtime_session_config( (false, false) => format!("{prompt}\n\n{startup_context}"), }; let model = Some( - model + params + .model + .clone() .or_else(|| config.experimental_realtime_ws_model.clone()) .unwrap_or_else(|| DEFAULT_REALTIME_MODEL.to_string()), ); @@ -826,7 +820,9 @@ pub(crate) async fn build_realtime_session_config( RealtimeWsVersion::V1 => RealtimeEventParser::V1, RealtimeWsVersion::V2 => RealtimeEventParser::RealtimeV2, }; - if version == RealtimeWsVersion::V1 && matches!(output_modality, RealtimeOutputModality::Text) { + if version == RealtimeWsVersion::V1 + && matches!(params.output_modality, RealtimeOutputModality::Text) + { return Err(CodexErr::InvalidRequest( "text realtime output modality requires realtime v2".to_string(), )); @@ -835,17 +831,23 @@ pub(crate) async fn build_realtime_session_config( RealtimeWsMode::Conversational => RealtimeSessionMode::Conversational, RealtimeWsMode::Transcription => RealtimeSessionMode::Transcription, }; - let voice = voice + let voice = params + .voice .or(config.realtime.voice) .unwrap_or_else(|| default_realtime_voice(version)); validate_realtime_voice(version, voice)?; Ok(RealtimeSessionConfig { instructions: prompt, model, - session_id: Some(realtime_session_id.unwrap_or_else(|| sess.thread_id.to_string())), + session_id: Some( + params + .realtime_session_id + .clone() + .unwrap_or_else(|| sess.thread_id.to_string()), + ), event_parser, session_mode, - output_modality, + output_modality: params.output_modality, voice, }) } diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index adc12ec48..3aa4b21a3 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -210,6 +210,7 @@ async fn start_realtime_conversation(codex: &codex_core::CodexThread) -> Result< codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, diff --git a/codex-rs/core/tests/suite/realtime_conversation.rs b/codex-rs/core/tests/suite/realtime_conversation.rs index ae9a1f25c..1a5d4d7fa 100644 --- a/codex-rs/core/tests/suite/realtime_conversation.rs +++ b/codex-rs/core/tests/suite/realtime_conversation.rs @@ -289,6 +289,7 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -433,6 +434,7 @@ async fn conversation_start_defaults_to_v2_and_gpt_realtime_1_5() -> Result<()> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -526,6 +528,7 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> { codex_response_item_prefix: None, model: Some("session-override-model".to_string()), output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ConversationStartTransport::Webrtc { @@ -708,6 +711,7 @@ async fn conversation_webrtc_start_uses_avas_architecture_query() -> Result<()> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ConversationStartTransport::Webrtc { @@ -808,6 +812,7 @@ async fn conversation_webrtc_start_uses_configured_call_base_url_for_avas() -> R codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ConversationStartTransport::Webrtc { @@ -900,6 +905,7 @@ async fn conversation_webrtc_close_while_sideband_connecting_drops_pending_join( codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ConversationStartTransport::Webrtc { @@ -989,6 +995,7 @@ async fn conversation_webrtc_sideband_connect_failure_closes_with_error() -> Res codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: Some(ConversationStartTransport::Webrtc { @@ -1080,6 +1087,7 @@ async fn conversation_start_uses_openai_env_key_fallback_with_chatgpt_auth() -> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1151,6 +1159,7 @@ async fn conversation_transport_close_emits_closed_event() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1246,6 +1255,7 @@ async fn conversation_start_preflight_failure_emits_realtime_error_only() -> Res codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1295,6 +1305,7 @@ async fn conversation_start_connect_failure_emits_realtime_error_only() -> Resul codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1392,6 +1403,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("old".to_string())), realtime_session_id: Some("conv_old".to_string()), transport: None, @@ -1420,6 +1432,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("new".to_string())), realtime_session_id: Some("conv_new".to_string()), transport: None, @@ -1519,6 +1532,7 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1586,6 +1600,7 @@ async fn conversation_uses_default_realtime_backend_prompt() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: None, realtime_session_id: None, transport: None, @@ -1661,6 +1676,7 @@ async fn conversation_uses_empty_instructions_for_null_or_empty_prompt() -> Resu codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt, realtime_session_id: None, transport: None, @@ -1729,6 +1745,7 @@ async fn conversation_uses_explicit_start_voice() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1789,6 +1806,7 @@ async fn conversation_uses_configured_realtime_voice() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1837,6 +1855,7 @@ async fn conversation_rejects_voice_for_wrong_realtime_version() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -1886,6 +1905,7 @@ async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() -> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("prompt from op".to_string())), realtime_session_id: None, transport: None, @@ -1961,6 +1981,7 @@ async fn conversation_uses_experimental_realtime_ws_startup_context_override() - codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("prompt from op".to_string())), realtime_session_id: None, transport: None, @@ -2030,6 +2051,7 @@ async fn conversation_disables_realtime_startup_context_with_empty_override() -> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("prompt from op".to_string())), realtime_session_id: None, transport: None, @@ -2092,6 +2114,7 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2208,6 +2231,7 @@ async fn conversation_startup_context_current_thread_selects_many_turns_by_budge codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2318,6 +2342,7 @@ async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<() codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2380,6 +2405,7 @@ async fn conversation_startup_context_is_truncated_and_sent_once_per_start() -> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2463,6 +2489,7 @@ async fn conversation_user_text_turn_is_not_sent_to_realtime() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2562,6 +2589,7 @@ async fn realtime_v2_noop_tool_call_returns_empty_function_output_without_respon codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2663,6 +2691,7 @@ async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Re codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2802,6 +2831,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() -> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -2956,6 +2986,7 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -3060,6 +3091,7 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -3165,6 +3197,7 @@ async fn inbound_handoff_request_sends_transcript_delta_after_each_handoff() -> codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -3268,6 +3301,7 @@ async fn inbound_conversation_item_does_not_start_turn_and_still_forwards_audio( codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -3393,6 +3427,7 @@ async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_au codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -3548,6 +3583,7 @@ async fn inbound_handoff_request_does_not_block_realtime_event_forwarding() -> R codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -3692,6 +3728,7 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> { codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, @@ -3847,6 +3884,7 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio() codex_response_item_prefix: None, model: None, output_modality: RealtimeOutputModality::Audio, + include_startup_context: true, prompt: Some(Some("backend prompt".to_string())), realtime_session_id: None, transport: None, diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 8f4cb60c4..b4b1659ee 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -190,6 +190,8 @@ pub struct ConversationStartParams { pub model: Option, /// Selects whether the realtime session should produce text or audio output. pub output_modality: RealtimeOutputModality, + /// Whether to append Codex's startup context to the realtime backend prompt. + pub include_startup_context: bool, pub prompt: Option>, pub realtime_session_id: Option, pub transport: Option,