mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
[codex] control automatic realtime handoff delivery (#27986)
## What Built on the realtime speech-control plumbing merged in #27917. - Add optional `codexResponseHandoffPrefix` to `thread/realtime/start`. - Apply that prefix only to automatic V1 commentary sent through `conversation.handoff.append`; final answers remain unprefixed. - Add opt-in `clientManagedHandoffs`. When true, core suppresses automatic response handoffs and completion output so delivery is controlled by explicit client append APIs. - Preserve existing automatic behavior by default. `codexResponsesAsItems: true` continues to select item routing when client-managed mode is disabled. ## Why Voice clients need two delivery policies: automatic background context with silent commentary instructions and fully client-owned handoffs. Phase-aware prefixing keeps routine commentary silent without suppressing the final answer, while client-managed mode lets an app decide exactly which updates to append. ## Validation - `just fmt` - `cargo test -p codex-app-server-protocol serialize_thread_realtime_start` - `RUST_MIN_STACK=16777216 cargo test -p codex-core --test all conversation_handoff_persists_across_item_done_until_turn_complete` - `RUST_MIN_STACK=16777216 cargo test -p codex-app-server --test all webrtc_v1_client_managed_handoffs_disable_automatic_output` - `RUST_MIN_STACK=16777216 cargo test -p codex-app-server --test all webrtc_v1_final_automatic_handoff_omits_silent_prefix` - `cargo build -p codex-cli --bin codex` - Local Codex Apps compatibility check: 43 focused webview tests passed, and a live voice session routed through the source-built app-server. The explicit `RUST_MIN_STACK` avoids a macOS Tokio test-worker stack overflow seen with the default test environment.
This commit is contained in:
committed by
GitHub
Unverified
parent
a306ac4ee3
commit
683bd170dc
@@ -3082,8 +3082,10 @@ mod tests {
|
||||
request_id: RequestId::Integer(9),
|
||||
params: v2::ThreadRealtimeStartParams {
|
||||
architecture: Some(RealtimeConversationArchitecture::Avas),
|
||||
client_managed_handoffs: Some(true),
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: Some("silent context".to_string()),
|
||||
thread_id: "thr_123".to_string(),
|
||||
model: Some("realtime-treatment-model".to_string()),
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -3102,8 +3104,10 @@ mod tests {
|
||||
"params": {
|
||||
"architecture": "avas",
|
||||
"threadId": "thr_123",
|
||||
"clientManagedHandoffs": true,
|
||||
"codexResponsesAsItems": null,
|
||||
"codexResponseItemPrefix": null,
|
||||
"codexResponseHandoffPrefix": "silent context",
|
||||
"model": "realtime-treatment-model",
|
||||
"outputModality": "audio",
|
||||
"includeStartupContext": false,
|
||||
@@ -3125,8 +3129,10 @@ mod tests {
|
||||
request_id: RequestId::Integer(9),
|
||||
params: v2::ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: "thr_123".to_string(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -3145,8 +3151,10 @@ mod tests {
|
||||
"params": {
|
||||
"architecture": null,
|
||||
"threadId": "thr_123",
|
||||
"clientManagedHandoffs": null,
|
||||
"codexResponsesAsItems": null,
|
||||
"codexResponseItemPrefix": null,
|
||||
"codexResponseHandoffPrefix": null,
|
||||
"model": null,
|
||||
"outputModality": "audio",
|
||||
"includeStartupContext": null,
|
||||
@@ -3163,8 +3171,10 @@ mod tests {
|
||||
request_id: RequestId::Integer(9),
|
||||
params: v2::ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: "thr_123".to_string(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -3183,8 +3193,10 @@ mod tests {
|
||||
"params": {
|
||||
"architecture": null,
|
||||
"threadId": "thr_123",
|
||||
"clientManagedHandoffs": null,
|
||||
"codexResponsesAsItems": null,
|
||||
"codexResponseItemPrefix": null,
|
||||
"codexResponseHandoffPrefix": null,
|
||||
"model": null,
|
||||
"outputModality": "audio",
|
||||
"includeStartupContext": null,
|
||||
@@ -3367,8 +3379,10 @@ mod tests {
|
||||
request_id: RequestId::Integer(1),
|
||||
params: v2::ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: "thr_123".to_string(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
|
||||
@@ -70,12 +70,21 @@ pub struct ThreadRealtimeStartParams {
|
||||
/// Overrides the configured realtime architecture for this session only.
|
||||
#[ts(optional = nullable)]
|
||||
pub architecture: Option<RealtimeConversationArchitecture>,
|
||||
/// Leaves Codex response handoffs to the client's explicit append calls instead of forwarding
|
||||
/// them automatically. Defaults to false.
|
||||
#[ts(optional = nullable)]
|
||||
pub client_managed_handoffs: Option<bool>,
|
||||
/// Sends automatic Codex responses as realtime conversation items instead of handoff appends.
|
||||
#[ts(optional = nullable)]
|
||||
pub codex_responses_as_items: Option<bool>,
|
||||
/// Optional prefix added to automatic Codex response items when `codexResponsesAsItems` is true.
|
||||
#[ts(optional = nullable)]
|
||||
pub codex_response_item_prefix: Option<String>,
|
||||
/// Optional prefix added to automatic V1 Codex commentary sent with
|
||||
/// `conversation.handoff.append` when `codexResponsesAsItems` is not true. Final answers are
|
||||
/// sent without the prefix.
|
||||
#[ts(optional = nullable)]
|
||||
pub codex_response_handoff_prefix: Option<String>,
|
||||
/// Overrides the configured realtime model for this session only.
|
||||
#[ts(optional = nullable)]
|
||||
pub model: Option<String>,
|
||||
|
||||
@@ -165,7 +165,7 @@ Example with notification opt-out:
|
||||
- `thread/inject_items` — append raw Responses API items to a loaded thread’s model-visible history without starting a user turn; returns `{}` on success.
|
||||
- `turn/steer` — add user input to an already in-flight regular turn without starting a new turn; returns the active `turnId` that accepted the input. `clientUserMessageId` is optional; when supplied, the corresponding `userMessage` item echoes it as `clientId`. Review and manual compaction turns reject `turn/steer`.
|
||||
- `turn/interrupt` — request cancellation of an in-flight turn by `(thread_id, turn_id)`; success is an empty `{}` response and the turn finishes with `status: "interrupted"`.
|
||||
- `thread/realtime/start` — start a thread-scoped realtime session (experimental); pass `outputModality: "text"` or `outputModality: "audio"` to choose model output, optionally pass `model` and `version` to override configured realtime selection for this session only, and pass `includeStartupContext: false` to omit Codex's generated startup context. By default, automatic Codex text follows the protocol's speakable output path. Pass `codexResponsesAsItems: true` to send automatic Codex responses as realtime conversation items instead, and optionally pass `codexResponseItemPrefix` to prepend experiment instructions to those items. Returns `{}` and streams `thread/realtime/*` notifications. Omit `transport` for the websocket transport, or pass `{ "type": "webrtc", "sdp": "..." }` to create a WebRTC session from a browser-generated SDP offer; the remote answer SDP is emitted as `thread/realtime/sdp`.
|
||||
- `thread/realtime/start` — start a thread-scoped realtime session (experimental); pass `outputModality: "text"` or `outputModality: "audio"` to choose model output, optionally pass `model` and `version` to override configured realtime selection for this session only, and pass `includeStartupContext: false` to omit Codex's generated startup context. By default, automatic Codex text follows the protocol's speakable output path. Pass `clientManagedHandoffs: true` to disable automatic Codex response delivery so only the client's explicit append calls produce handoffs. Pass `codexResponsesAsItems: true` to send automatic Codex responses as realtime conversation items instead, and optionally pass `codexResponseItemPrefix` to prepend experiment instructions to those items. For V1 sessions, pass `codexResponseHandoffPrefix` while item mode is disabled to route automatic Codex commentary through `conversation.handoff.append` with that prefix; final answers remain unprefixed. Returns `{}` and streams `thread/realtime/*` notifications. Omit `transport` for the websocket transport, or pass `{ "type": "webrtc", "sdp": "..." }` to create a WebRTC session from a browser-generated SDP offer; the remote answer SDP is emitted as `thread/realtime/sdp`.
|
||||
- `thread/realtime/appendAudio` — append an input audio chunk to the active realtime session (experimental); returns `{}`.
|
||||
- `thread/realtime/appendText` — append text input to the active realtime session with a required `role` of `user` or `developer` (experimental); returns `{}`. Older clients that omit `role` default to `user`.
|
||||
- `thread/realtime/appendSpeech` — append text that the realtime model should speak to the user (experimental); returns `{}`.
|
||||
@@ -882,12 +882,19 @@ Clients may also pass `model` and `version` on `thread/realtime/start` to select
|
||||
different realtime session configuration without changing thread or user config.
|
||||
Pass `includeStartupContext: false` to skip Codex's startup context for this
|
||||
session while still using the selected backend prompt.
|
||||
Pass `clientManagedHandoffs: true` to suppress automatic Codex response handoffs
|
||||
and items. The client can then choose which updates to deliver with
|
||||
`thread/realtime/appendText` or `thread/realtime/appendSpeech`.
|
||||
Pass `codexResponsesAsItems: true` to inject automatic Codex responses with
|
||||
`conversation.item.create` instead of the protocol's default speakable output
|
||||
path. When using that mode, `codexResponseItemPrefix` can prepend short
|
||||
experiment instructions to each automatic Codex response item. Omit
|
||||
`codexResponsesAsItems`, or pass `false`, to preserve the default speakable
|
||||
behavior. Call
|
||||
behavior. For V1 sessions, `codexResponseHandoffPrefix` instead routes automatic
|
||||
Codex commentary through `conversation.handoff.append` and prepends the provided
|
||||
text. Final answers remain unprefixed. Item mode takes precedence when
|
||||
`codexResponsesAsItems` is true.
|
||||
Call
|
||||
`thread/realtime/appendText` to append app-provided realtime text items, or
|
||||
`thread/realtime/appendSpeech` when the app decides a realtime update should be
|
||||
spoken.
|
||||
|
||||
@@ -931,8 +931,10 @@ impl TurnRequestProcessor {
|
||||
thread.as_ref(),
|
||||
Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: params.architecture,
|
||||
client_managed_handoffs: params.client_managed_handoffs.unwrap_or(false),
|
||||
codex_responses_as_items: params.codex_responses_as_items.unwrap_or(false),
|
||||
codex_response_item_prefix: params.codex_response_item_prefix,
|
||||
codex_response_handoff_prefix: params.codex_response_handoff_prefix,
|
||||
model: params.model,
|
||||
output_modality: params.output_modality,
|
||||
include_startup_context: params.include_startup_context.unwrap_or(true),
|
||||
|
||||
@@ -80,8 +80,10 @@ async fn realtime_conversation_start_requires_experimental_api_capability() -> R
|
||||
let request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: "thr_123".to_string(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -192,8 +194,10 @@ async fn realtime_webrtc_start_requires_experimental_api_capability() -> Result<
|
||||
let request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: "thr_123".to_string(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
|
||||
@@ -86,6 +86,8 @@ const V2_HANDOFF_COMPLETE_ACKNOWLEDGEMENT: &str =
|
||||
"Background agent finished. Use the preceding [BACKEND] messages as the result.";
|
||||
const RESPONSE_ITEM_PREFIX: &str =
|
||||
"Use the following context to inform future responses, but do not speak it to the user.";
|
||||
const RESPONSE_HANDOFF_PREFIX: &str =
|
||||
"Silent Codex context. Do not speak, acknowledge, or summarize this item.";
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum StartupContextConfig<'a> {
|
||||
@@ -313,8 +315,9 @@ impl RealtimeE2eHarness {
|
||||
}
|
||||
|
||||
async fn start_webrtc_realtime(&mut self, offer_sdp: &str) -> Result<StartedWebrtcRealtime> {
|
||||
self.start_webrtc_realtime_with_codex_responses_as_items(
|
||||
offer_sdp, /*codex_responses_as_items*/ None,
|
||||
self.start_webrtc_realtime_with_codex_response_routing(
|
||||
offer_sdp, /*client_managed_handoffs*/ None,
|
||||
/*codex_responses_as_items*/ None, /*codex_response_handoff_prefix*/ None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -323,17 +326,21 @@ impl RealtimeE2eHarness {
|
||||
&mut self,
|
||||
offer_sdp: &str,
|
||||
) -> Result<StartedWebrtcRealtime> {
|
||||
self.start_webrtc_realtime_with_codex_responses_as_items(
|
||||
self.start_webrtc_realtime_with_codex_response_routing(
|
||||
offer_sdp,
|
||||
/*client_managed_handoffs*/ None,
|
||||
/*codex_responses_as_items*/ Some(true),
|
||||
/*codex_response_handoff_prefix*/ None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn start_webrtc_realtime_with_codex_responses_as_items(
|
||||
async fn start_webrtc_realtime_with_codex_response_routing(
|
||||
&mut self,
|
||||
offer_sdp: &str,
|
||||
client_managed_handoffs: Option<bool>,
|
||||
codex_responses_as_items: Option<bool>,
|
||||
codex_response_handoff_prefix: Option<&str>,
|
||||
) -> Result<StartedWebrtcRealtime> {
|
||||
// Starts realtime through the public JSON-RPC method, then waits for the same client-visible
|
||||
// notifications a desktop app needs: started first, SDP answer second.
|
||||
@@ -341,10 +348,12 @@ impl RealtimeE2eHarness {
|
||||
.mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs,
|
||||
thread_id: self.thread_id.clone(),
|
||||
codex_response_item_prefix: codex_responses_as_items
|
||||
.unwrap_or(false)
|
||||
.then(|| RESPONSE_ITEM_PREFIX.to_string()),
|
||||
codex_response_handoff_prefix: codex_response_handoff_prefix.map(str::to_string),
|
||||
codex_responses_as_items,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -611,8 +620,10 @@ async fn realtime_conversation_streams_v2_notifications() -> Result<()> {
|
||||
let start_request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: thread_start.thread.id.clone(),
|
||||
model: Some("realtime-treatment-model".to_string()),
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -867,8 +878,10 @@ async fn realtime_start_can_skip_startup_context() -> Result<()> {
|
||||
let start_request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: thread_start.thread.id.clone(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -964,8 +977,10 @@ async fn realtime_text_output_modality_requests_text_output_and_final_transcript
|
||||
let start_request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: thread_start.thread.id.clone(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Text,
|
||||
@@ -1144,8 +1159,10 @@ async fn realtime_conversation_stop_emits_closed_notification() -> Result<()> {
|
||||
let start_request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: thread_start.thread.id.clone(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -1247,8 +1264,10 @@ async fn realtime_webrtc_start_emits_sdp_notification() -> Result<()> {
|
||||
let start_request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: thread_id.clone(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -1480,6 +1499,135 @@ async fn webrtc_v1_default_automatic_output_uses_handoff_append() -> Result<()>
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn webrtc_v1_client_managed_handoffs_disable_automatic_output() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let mut harness = RealtimeE2eHarness::new(
|
||||
RealtimeTestVersion::V1,
|
||||
main_loop_responses(vec![create_final_assistant_message_sse_response(
|
||||
"client-managed output",
|
||||
)?]),
|
||||
realtime_sideband(vec![realtime_sideband_connection(vec![
|
||||
vec![session_updated("sess_v1_client_managed_handoffs")],
|
||||
vec![],
|
||||
])]),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let started = harness
|
||||
.start_webrtc_realtime_with_codex_response_routing(
|
||||
"v=offer\r\n",
|
||||
/*client_managed_handoffs*/ Some(true),
|
||||
/*codex_responses_as_items*/ None,
|
||||
/*codex_response_handoff_prefix*/ None,
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(started.started.version, RealtimeConversationVersion::V1);
|
||||
assert_v1_session_update(&harness.sideband_outbound_request(/*request_index*/ 0).await)?;
|
||||
|
||||
let turn_request_id = harness
|
||||
.mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: harness.thread_id.clone(),
|
||||
input: vec![V2UserInput::Text {
|
||||
text: "leave realtime delivery to the client".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let turn_response: JSONRPCResponse = timeout(
|
||||
DEFAULT_TIMEOUT,
|
||||
harness
|
||||
.mcp
|
||||
.read_stream_until_response_message(RequestId::Integer(turn_request_id)),
|
||||
)
|
||||
.await??;
|
||||
let _: TurnStartResponse = to_response(turn_response)?;
|
||||
let _ = harness
|
||||
.read_notification::<TurnCompletedNotification>("turn/completed")
|
||||
.await?;
|
||||
|
||||
let automatic_handoff = timeout(
|
||||
Duration::from_millis(200),
|
||||
harness
|
||||
.realtime_server
|
||||
.wait_for_request(/*connection_index*/ 0, /*request_index*/ 1),
|
||||
)
|
||||
.await;
|
||||
assert!(
|
||||
automatic_handoff.is_err(),
|
||||
"automatic Codex output should not reach realtime in client-managed handoff mode"
|
||||
);
|
||||
|
||||
harness
|
||||
.append_speech(harness.thread_id.clone(), "client-selected speech")
|
||||
.await?;
|
||||
assert_eq!(
|
||||
harness.sideband_outbound_request(/*request_index*/ 1).await,
|
||||
json!({
|
||||
"type": "conversation.handoff.append",
|
||||
"handoff_id": "codex",
|
||||
"output_text": "client-selected speech",
|
||||
})
|
||||
);
|
||||
|
||||
harness.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn webrtc_v1_final_automatic_handoff_omits_silent_prefix() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let mut harness = RealtimeE2eHarness::new(
|
||||
RealtimeTestVersion::V1,
|
||||
main_loop_responses(vec![create_final_assistant_message_sse_response(
|
||||
"background progress",
|
||||
)?]),
|
||||
realtime_sideband(vec![realtime_sideband_connection(vec![
|
||||
vec![
|
||||
session_updated("sess_v1_prefixed_handoff"),
|
||||
json!({
|
||||
"type": "conversation.handoff.requested",
|
||||
"handoff_id": "handoff_prefixed",
|
||||
"item_id": "item_prefixed",
|
||||
"input_transcript": "run the background task"
|
||||
}),
|
||||
],
|
||||
vec![],
|
||||
vec![],
|
||||
])]),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let started = harness
|
||||
.start_webrtc_realtime_with_codex_response_routing(
|
||||
"v=offer\r\n",
|
||||
/*client_managed_handoffs*/ None,
|
||||
/*codex_responses_as_items*/ None,
|
||||
Some(RESPONSE_HANDOFF_PREFIX),
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(started.started.version, RealtimeConversationVersion::V1);
|
||||
let _ = harness
|
||||
.read_notification::<TurnCompletedNotification>("turn/completed")
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
harness.sideband_outbound_request(/*request_index*/ 1).await,
|
||||
json!({
|
||||
"type": "conversation.handoff.append",
|
||||
"handoff_id": "handoff_prefixed",
|
||||
"output_text": "background progress",
|
||||
})
|
||||
);
|
||||
|
||||
harness.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn webrtc_v1_handoff_request_delegates_context_and_manual_append_speaks() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -2468,8 +2616,10 @@ async fn realtime_webrtc_start_surfaces_backend_error() -> Result<()> {
|
||||
let start_request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: thread_start.thread.id,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
@@ -2533,8 +2683,10 @@ async fn realtime_conversation_requires_feature_flag() -> Result<()> {
|
||||
let start_request_id = mcp
|
||||
.send_thread_realtime_start_request(ThreadRealtimeStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: None,
|
||||
codex_responses_as_items: None,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
thread_id: thread_start.thread.id.clone(),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
|
||||
@@ -30,6 +30,7 @@ use codex_login::read_openai_api_key_from_env;
|
||||
use codex_model_provider_info::ModelProviderInfo;
|
||||
use codex_protocol::error::CodexErr;
|
||||
use codex_protocol::error::Result as CodexResult;
|
||||
use codex_protocol::models::MessagePhase;
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::ConversationAudioParams;
|
||||
use codex_protocol::protocol::ConversationSpeechParams;
|
||||
@@ -107,8 +108,10 @@ struct RealtimeHandoffState {
|
||||
output_tx: Sender<RealtimeOutbound>,
|
||||
active_handoff: Arc<Mutex<Option<String>>>,
|
||||
last_output_text: Arc<Mutex<Option<String>>>,
|
||||
client_managed_handoffs: bool,
|
||||
codex_responses_as_items: bool,
|
||||
codex_response_item_prefix: Option<String>,
|
||||
codex_response_handoff_prefix: Option<String>,
|
||||
session_kind: RealtimeSessionKind,
|
||||
}
|
||||
|
||||
@@ -116,6 +119,7 @@ struct RealtimeHandoffState {
|
||||
enum RealtimeOutbound {
|
||||
StandaloneHandoff { text: String },
|
||||
HandoffUpdate { handoff_id: String, text: String },
|
||||
HandoffAppend { handoff_id: String, text: String },
|
||||
CompletedHandoff { handoff_id: String, text: String },
|
||||
ConversationItem { text: String },
|
||||
HandoffCompleteAck { handoff_id: String },
|
||||
@@ -210,16 +214,20 @@ struct RealtimeInputChannels {
|
||||
impl RealtimeHandoffState {
|
||||
fn new(
|
||||
output_tx: Sender<RealtimeOutbound>,
|
||||
client_managed_handoffs: bool,
|
||||
codex_responses_as_items: bool,
|
||||
codex_response_item_prefix: Option<String>,
|
||||
codex_response_handoff_prefix: Option<String>,
|
||||
session_kind: RealtimeSessionKind,
|
||||
) -> Self {
|
||||
Self {
|
||||
output_tx,
|
||||
active_handoff: Arc::new(Mutex::new(None)),
|
||||
last_output_text: Arc::new(Mutex::new(None)),
|
||||
client_managed_handoffs,
|
||||
codex_responses_as_items,
|
||||
codex_response_item_prefix,
|
||||
codex_response_handoff_prefix,
|
||||
session_kind,
|
||||
}
|
||||
}
|
||||
@@ -240,8 +248,10 @@ struct RealtimeStart {
|
||||
api_provider: ApiProvider,
|
||||
architecture: RealtimeConversationArchitecture,
|
||||
extra_headers: Option<HeaderMap>,
|
||||
client_managed_handoffs: bool,
|
||||
codex_responses_as_items: bool,
|
||||
codex_response_item_prefix: Option<String>,
|
||||
codex_response_handoff_prefix: Option<String>,
|
||||
realtime_call_api_provider: Option<ApiProvider>,
|
||||
session_config: RealtimeSessionConfig,
|
||||
model_client: ModelClient,
|
||||
@@ -296,8 +306,10 @@ impl RealtimeConversationManager {
|
||||
api_provider,
|
||||
architecture,
|
||||
extra_headers,
|
||||
client_managed_handoffs,
|
||||
codex_responses_as_items,
|
||||
codex_response_item_prefix,
|
||||
codex_response_handoff_prefix,
|
||||
realtime_call_api_provider,
|
||||
session_config,
|
||||
model_client,
|
||||
@@ -321,8 +333,10 @@ impl RealtimeConversationManager {
|
||||
let realtime_active = Arc::new(AtomicBool::new(true));
|
||||
let handoff = RealtimeHandoffState::new(
|
||||
handoff_output_tx,
|
||||
client_managed_handoffs,
|
||||
codex_responses_as_items,
|
||||
codex_response_item_prefix,
|
||||
codex_response_handoff_prefix,
|
||||
session_kind,
|
||||
);
|
||||
let input_channels = RealtimeInputChannels {
|
||||
@@ -479,7 +493,11 @@ impl RealtimeConversationManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn handoff_out(&self, output_text: String) -> CodexResult<()> {
|
||||
pub(crate) async fn handoff_out(
|
||||
&self,
|
||||
output_text: String,
|
||||
phase: Option<MessagePhase>,
|
||||
) -> CodexResult<()> {
|
||||
let handoff = {
|
||||
let guard = self.state.lock().await;
|
||||
let Some(state) = guard.as_ref() else {
|
||||
@@ -490,6 +508,13 @@ impl RealtimeConversationManager {
|
||||
state.handoff.clone()
|
||||
};
|
||||
|
||||
if handoff.client_managed_handoffs {
|
||||
return Ok(());
|
||||
}
|
||||
let response_handoff_prefix = match phase {
|
||||
Some(MessagePhase::Commentary) => handoff.codex_response_handoff_prefix.clone(),
|
||||
Some(MessagePhase::FinalAnswer) | None => None,
|
||||
};
|
||||
let active_handoff = handoff.active_handoff.lock().await.clone();
|
||||
let output = match active_handoff {
|
||||
Some(handoff_id) => {
|
||||
@@ -502,6 +527,16 @@ impl RealtimeConversationManager {
|
||||
handoff.codex_response_item_prefix.as_deref(),
|
||||
),
|
||||
}
|
||||
} else if handoff.session_kind == RealtimeSessionKind::V1
|
||||
&& handoff.codex_response_handoff_prefix.is_some()
|
||||
{
|
||||
RealtimeOutbound::HandoffAppend {
|
||||
handoff_id,
|
||||
text: realtime_backend_item(
|
||||
output_text,
|
||||
response_handoff_prefix.as_deref(),
|
||||
),
|
||||
}
|
||||
} else {
|
||||
RealtimeOutbound::HandoffUpdate {
|
||||
handoff_id,
|
||||
@@ -520,7 +555,13 @@ impl RealtimeConversationManager {
|
||||
),
|
||||
}
|
||||
} else {
|
||||
RealtimeOutbound::StandaloneHandoff { text: output_text }
|
||||
RealtimeOutbound::StandaloneHandoff {
|
||||
text: if handoff.session_kind == RealtimeSessionKind::V1 {
|
||||
realtime_backend_item(output_text, response_handoff_prefix.as_deref())
|
||||
} else {
|
||||
output_text
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -565,6 +606,9 @@ impl RealtimeConversationManager {
|
||||
let Some(handoff) = handoff else {
|
||||
return Ok(());
|
||||
};
|
||||
if handoff.client_managed_handoffs {
|
||||
return Ok(());
|
||||
}
|
||||
match handoff.session_kind {
|
||||
RealtimeSessionKind::V1 => return Ok(()),
|
||||
RealtimeSessionKind::V2 => {}
|
||||
@@ -675,8 +719,10 @@ struct PreparedRealtimeConversationStart {
|
||||
api_provider: ApiProvider,
|
||||
architecture: RealtimeConversationArchitecture,
|
||||
extra_headers: Option<HeaderMap>,
|
||||
client_managed_handoffs: bool,
|
||||
codex_responses_as_items: bool,
|
||||
codex_response_item_prefix: Option<String>,
|
||||
codex_response_handoff_prefix: Option<String>,
|
||||
realtime_call_api_provider: Option<ApiProvider>,
|
||||
requested_realtime_session_id: Option<String>,
|
||||
version: RealtimeWsVersion,
|
||||
@@ -744,8 +790,10 @@ async fn prepare_realtime_start(
|
||||
api_provider,
|
||||
architecture,
|
||||
extra_headers,
|
||||
client_managed_handoffs: params.client_managed_handoffs,
|
||||
codex_responses_as_items: params.codex_responses_as_items,
|
||||
codex_response_item_prefix: params.codex_response_item_prefix,
|
||||
codex_response_handoff_prefix: params.codex_response_handoff_prefix,
|
||||
realtime_call_api_provider,
|
||||
requested_realtime_session_id,
|
||||
version,
|
||||
@@ -914,8 +962,10 @@ async fn handle_start_inner(
|
||||
api_provider,
|
||||
architecture,
|
||||
extra_headers,
|
||||
client_managed_handoffs,
|
||||
codex_responses_as_items,
|
||||
codex_response_item_prefix,
|
||||
codex_response_handoff_prefix,
|
||||
realtime_call_api_provider,
|
||||
requested_realtime_session_id,
|
||||
version,
|
||||
@@ -931,8 +981,10 @@ async fn handle_start_inner(
|
||||
api_provider,
|
||||
architecture,
|
||||
extra_headers,
|
||||
client_managed_handoffs,
|
||||
codex_responses_as_items,
|
||||
codex_response_item_prefix,
|
||||
codex_response_handoff_prefix,
|
||||
realtime_call_api_provider,
|
||||
session_config,
|
||||
model_client: sess.services.model_client.clone(),
|
||||
@@ -1368,6 +1420,11 @@ async fn handle_handoff_output(
|
||||
.send_conversation_function_call_output(handoff_id, text)
|
||||
.await
|
||||
}
|
||||
RealtimeOutbound::HandoffAppend { handoff_id, text } => {
|
||||
writer
|
||||
.send_conversation_handoff_append(handoff_id, text)
|
||||
.await
|
||||
}
|
||||
RealtimeOutbound::ConversationItem { text } => {
|
||||
writer
|
||||
.send_conversation_item_create(text, ConversationTextRole::Developer)
|
||||
@@ -1388,7 +1445,8 @@ async fn handle_handoff_output(
|
||||
.await;
|
||||
}
|
||||
}
|
||||
RealtimeOutbound::HandoffUpdate { handoff_id, text } => {
|
||||
RealtimeOutbound::HandoffUpdate { handoff_id, text }
|
||||
| RealtimeOutbound::HandoffAppend { handoff_id, text } => {
|
||||
let active_handoff = handoff_state.active_handoff.lock().await.clone();
|
||||
match active_handoff {
|
||||
Some(active_handoff) if active_handoff == handoff_id => {}
|
||||
|
||||
@@ -130,8 +130,10 @@ async fn clears_active_handoff_explicitly() {
|
||||
let (tx, _rx) = bounded(1);
|
||||
let state = RealtimeHandoffState::new(
|
||||
tx,
|
||||
/*client_managed_handoffs*/ false,
|
||||
/*codex_responses_as_items*/ false,
|
||||
/*codex_response_item_prefix*/ None,
|
||||
/*codex_response_handoff_prefix*/ None,
|
||||
RealtimeSessionKind::V1,
|
||||
);
|
||||
|
||||
|
||||
@@ -1809,13 +1809,13 @@ impl Session {
|
||||
}
|
||||
|
||||
async fn maybe_mirror_event_text_to_realtime(&self, msg: &EventMsg) {
|
||||
let Some(text) = realtime_text_for_event(msg) else {
|
||||
let Some((text, phase)) = realtime_text_for_event(msg) else {
|
||||
return;
|
||||
};
|
||||
if self.conversation.running_state().await.is_none() {
|
||||
return;
|
||||
}
|
||||
if let Err(err) = self.conversation.handoff_out(text).await {
|
||||
if let Err(err) = self.conversation.handoff_out(text, phase).await {
|
||||
debug!("failed to mirror event text to realtime conversation: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1460,11 +1460,11 @@ fn agent_message_text(item: &codex_protocol::items::AgentMessageItem) -> String
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(super) fn realtime_text_for_event(msg: &EventMsg) -> Option<String> {
|
||||
pub(super) fn realtime_text_for_event(msg: &EventMsg) -> Option<(String, Option<MessagePhase>)> {
|
||||
match msg {
|
||||
EventMsg::AgentMessage(event) => Some(event.message.clone()),
|
||||
EventMsg::AgentMessage(event) => Some((event.message.clone(), event.phase.clone())),
|
||||
EventMsg::ItemCompleted(event) => match &event.item {
|
||||
TurnItem::AgentMessage(item) => Some(agent_message_text(item)),
|
||||
TurnItem::AgentMessage(item) => Some((agent_message_text(item), item.phase.clone())),
|
||||
_ => None,
|
||||
},
|
||||
EventMsg::Error(_)
|
||||
|
||||
@@ -204,8 +204,10 @@ async fn start_realtime_conversation(codex: &codex_core::CodexThread) -> Result<
|
||||
codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
|
||||
@@ -67,6 +67,7 @@ const MEMORY_PROMPT_PHRASE: &str =
|
||||
"You have access to a memory folder with guidance from prior runs.";
|
||||
const REALTIME_CONVERSATION_TEST_SUBPROCESS_ENV_VAR: &str =
|
||||
"CODEX_REALTIME_CONVERSATION_TEST_SUBPROCESS";
|
||||
const SILENT_CONTEXT_PREFIX: &str = "[BACKEND] Silent Codex context. Do not speak, acknowledge, or summarize this item. Wait for an explicit speakable handoff or direct user request.";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct RealtimeCallRequestCapture {
|
||||
@@ -285,8 +286,10 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -430,8 +433,10 @@ async fn conversation_start_defaults_to_v2_and_gpt_realtime_1_5() -> Result<()>
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -524,8 +529,10 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: Some("session-override-model".to_string()),
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -707,8 +714,10 @@ async fn conversation_webrtc_start_uses_avas_architecture_query() -> Result<()>
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: Some(RealtimeConversationArchitecture::Avas),
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -808,8 +817,10 @@ async fn conversation_webrtc_start_uses_configured_call_base_url_for_avas() -> R
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: Some(RealtimeConversationArchitecture::Avas),
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -901,8 +912,10 @@ async fn conversation_webrtc_close_while_sideband_connecting_drops_pending_join(
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -991,8 +1004,10 @@ async fn conversation_webrtc_sideband_connect_failure_closes_with_error() -> Res
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1083,8 +1098,10 @@ async fn conversation_start_uses_openai_env_key_fallback_with_chatgpt_auth() ->
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1155,8 +1172,10 @@ async fn conversation_transport_close_emits_closed_event() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1251,8 +1270,10 @@ async fn conversation_start_preflight_failure_emits_realtime_error_only() -> Res
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1301,8 +1322,10 @@ async fn conversation_start_connect_failure_emits_realtime_error_only() -> Resul
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1399,8 +1422,10 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1428,8 +1453,10 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1528,8 +1555,10 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1596,8 +1625,10 @@ async fn conversation_uses_default_realtime_backend_prompt() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1672,8 +1703,10 @@ async fn conversation_uses_empty_instructions_for_null_or_empty_prompt() -> Resu
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1741,8 +1774,10 @@ async fn conversation_uses_explicit_start_voice() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1802,8 +1837,10 @@ async fn conversation_uses_configured_realtime_voice() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1851,8 +1888,10 @@ async fn conversation_rejects_voice_for_wrong_realtime_version() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1901,8 +1940,10 @@ async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() ->
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -1977,8 +2018,10 @@ async fn conversation_uses_experimental_realtime_ws_startup_context_override() -
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2047,8 +2090,10 @@ async fn conversation_disables_realtime_startup_context_with_empty_override() ->
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2110,8 +2155,10 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2227,8 +2274,10 @@ async fn conversation_startup_context_current_thread_selects_many_turns_by_budge
|
||||
codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2338,8 +2387,10 @@ async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<()
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2401,8 +2452,10 @@ async fn conversation_startup_context_is_truncated_and_sent_once_per_start() ->
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2485,8 +2538,10 @@ async fn conversation_user_text_turn_is_not_sent_to_realtime() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2585,8 +2640,10 @@ async fn realtime_v2_noop_tool_call_returns_empty_function_output_without_respon
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2687,8 +2744,10 @@ async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Re
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2764,6 +2823,10 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let (gate_second_message_tx, gate_second_message_rx) = oneshot::channel();
|
||||
let mut commentary_message = responses::ev_assistant_message("msg-1", "assistant message 1");
|
||||
commentary_message["item"]["phase"] = json!("commentary");
|
||||
let mut final_message = responses::ev_assistant_message("msg-2", "assistant message 2");
|
||||
final_message["item"]["phase"] = json!("final_answer");
|
||||
let first_chunks = vec![
|
||||
StreamingSseChunk {
|
||||
gate: None,
|
||||
@@ -2771,17 +2834,11 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
|
||||
},
|
||||
StreamingSseChunk {
|
||||
gate: None,
|
||||
body: sse_event(responses::ev_assistant_message(
|
||||
"msg-1",
|
||||
"assistant message 1",
|
||||
)),
|
||||
body: sse_event(commentary_message),
|
||||
},
|
||||
StreamingSseChunk {
|
||||
gate: Some(gate_second_message_rx),
|
||||
body: sse_event(responses::ev_assistant_message(
|
||||
"msg-2",
|
||||
"assistant message 2",
|
||||
)),
|
||||
body: sse_event(final_message),
|
||||
},
|
||||
StreamingSseChunk {
|
||||
gate: None,
|
||||
@@ -2827,8 +2884,10 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: Some(SILENT_CONTEXT_PREFIX.to_string()),
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -2873,7 +2932,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
|
||||
);
|
||||
assert_eq!(
|
||||
first_append.body_json()["output_text"].as_str(),
|
||||
Some("\"Agent Final Message\":\n\nassistant message 1")
|
||||
Some(format!("{SILENT_CONTEXT_PREFIX}\n\nassistant message 1").as_str())
|
||||
);
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
@@ -2899,7 +2958,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
|
||||
);
|
||||
assert_eq!(
|
||||
second_append.body_json()["output_text"].as_str(),
|
||||
Some("\"Agent Final Message\":\n\nassistant message 2")
|
||||
Some("assistant message 2")
|
||||
);
|
||||
|
||||
let completion = completions
|
||||
@@ -2982,8 +3041,10 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -3095,8 +3156,10 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -3201,8 +3264,10 @@ async fn inbound_handoff_request_sends_transcript_delta_after_each_handoff() ->
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -3305,8 +3370,10 @@ async fn inbound_conversation_item_does_not_start_turn_and_still_forwards_audio(
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -3431,8 +3498,10 @@ async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_au
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -3587,8 +3656,10 @@ async fn inbound_handoff_request_does_not_block_realtime_event_forwarding() -> R
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -3732,8 +3803,10 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
@@ -3888,8 +3961,10 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio()
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
architecture: None,
|
||||
client_managed_handoffs: false,
|
||||
codex_responses_as_items: false,
|
||||
codex_response_item_prefix: None,
|
||||
codex_response_handoff_prefix: None,
|
||||
model: None,
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
include_startup_context: true,
|
||||
|
||||
@@ -182,10 +182,16 @@ pub struct McpServerRefreshConfig {
|
||||
pub struct ConversationStartParams {
|
||||
/// Overrides the configured realtime architecture for this session only.
|
||||
pub architecture: Option<RealtimeConversationArchitecture>,
|
||||
/// Whether Codex response handoffs are managed through explicit client append calls.
|
||||
pub client_managed_handoffs: bool,
|
||||
/// Sends automatic Codex responses as realtime conversation items instead of handoff appends.
|
||||
pub codex_responses_as_items: bool,
|
||||
/// Optional prefix added to automatic Codex response items when `codex_responses_as_items` is set.
|
||||
pub codex_response_item_prefix: Option<String>,
|
||||
/// Optional prefix added to automatic V1 Codex commentary sent with
|
||||
/// `conversation.handoff.append` when `codex_responses_as_items` is not set. Final answers are
|
||||
/// sent without the prefix.
|
||||
pub codex_response_handoff_prefix: Option<String>,
|
||||
/// Overrides the configured realtime model for this session only.
|
||||
pub model: Option<String>,
|
||||
/// Selects whether the realtime session should produce text or audio output.
|
||||
|
||||
Reference in New Issue
Block a user