Wrap delegated input text (#17868)

## Summary
- wrap routed delegation text in a small XML envelope before submitting
it as a user turn
- escape XML text content so the envelope stays well formed
- update focused coverage for the wrapper and the affected routed-turn
expectations
This commit is contained in:
guinness-oai
2026-04-14 19:58:58 -07:00
committed by GitHub
Unverified
parent 130b047beb
commit 6f5ddd408b
3 changed files with 50 additions and 29 deletions
+17 -1
View File
@@ -805,7 +805,9 @@ async fn handle_start_inner(
if let Some(text) = maybe_routed_text {
debug!(text = %text, "[realtime-text] realtime conversation text output");
let sess_for_routed_text = Arc::clone(&sess_clone);
sess_for_routed_text.route_realtime_text_input(text).await;
sess_for_routed_text
.route_realtime_text_input(wrap_realtime_delegation_input(&text))
.await;
}
if !fanout_realtime_active.load(Ordering::Relaxed) {
break;
@@ -867,6 +869,20 @@ fn realtime_text_from_handoff_request(handoff: &RealtimeHandoffRequested) -> Opt
.or((!handoff.input_transcript.is_empty()).then_some(handoff.input_transcript.clone()))
}
fn wrap_realtime_delegation_input(input: &str) -> String {
format!(
"<realtime_delegation>\n <input>{}</input>\n</realtime_delegation>",
escape_xml_text(input)
)
}
fn escape_xml_text(input: &str) -> String {
input
.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
}
fn realtime_api_key(auth: Option<&CodexAuth>, provider: &ModelProviderInfo) -> CodexResult<String> {
if let Some(api_key) = provider.api_key()? {
return Ok(api_key);
@@ -1,6 +1,7 @@
use super::RealtimeHandoffState;
use super::RealtimeSessionKind;
use super::realtime_text_from_handoff_request;
use super::wrap_realtime_delegation_input;
use async_channel::bounded;
use codex_protocol::protocol::RealtimeHandoffRequested;
use codex_protocol::protocol::RealtimeTranscriptEntry;
@@ -54,6 +55,22 @@ fn ignores_empty_handoff_request_input_transcript() {
assert_eq!(realtime_text_from_handoff_request(&handoff), None);
}
#[test]
fn wraps_realtime_delegation_input() {
assert_eq!(
wrap_realtime_delegation_input("hello"),
"<realtime_delegation>\n <input>hello</input>\n</realtime_delegation>"
);
}
#[test]
fn wraps_realtime_delegation_input_with_xml_escaping() {
assert_eq!(
wrap_realtime_delegation_input("use a < b && c > d"),
"<realtime_delegation>\n <input>use a &lt; b &amp;&amp; c &gt; d</input>\n</realtime_delegation>"
);
}
#[tokio::test]
async fn clears_active_handoff_explicitly() {
let (tx, _rx) = bounded(1);
@@ -2417,11 +2417,8 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> {
let request = response_mock.single_request();
let user_texts = request.message_input_texts("user");
assert!(
user_texts
.iter()
.any(|text| text == "user: text from realtime")
);
assert!(user_texts.iter().any(|text| text
== "<realtime_delegation>\n <input>user: text from realtime</input>\n</realtime_delegation>"));
realtime_server.shutdown().await;
Ok(())
@@ -2503,7 +2500,7 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> {
let request = response_mock.single_request();
let user_texts = request.message_input_texts("user");
assert!(user_texts.iter().any(|text| text
== "assistant: assistant context\nuser: delegated query\nassistant: assist confirm"));
== "<realtime_delegation>\n <input>assistant: assistant context\nuser: delegated query\nassistant: assist confirm</input>\n</realtime_delegation>"));
realtime_server.shutdown().await;
Ok(())
@@ -2617,23 +2614,14 @@ async fn inbound_handoff_request_clears_active_transcript_after_each_handoff() -
assert_eq!(requests.len(), 2);
let first_user_texts = requests[0].message_input_texts("user");
assert!(
first_user_texts
.iter()
.any(|text| text == "user: first question")
);
assert!(first_user_texts.iter().any(|text| text
== "<realtime_delegation>\n <input>user: first question</input>\n</realtime_delegation>"));
let second_user_texts = requests[1].message_input_texts("user");
assert!(
second_user_texts
.iter()
.any(|text| text == "user: second question")
);
assert!(
!second_user_texts
.iter()
.any(|text| text == "user: first question\nuser: second question")
);
assert!(second_user_texts.iter().any(|text| text
== "<realtime_delegation>\n <input>user: second question</input>\n</realtime_delegation>"));
assert!(!second_user_texts.iter().any(|text| text
== "<realtime_delegation>\n <input>user: first question\nuser: second question</input>\n</realtime_delegation>"));
realtime_server.shutdown().await;
Ok(())
@@ -3156,14 +3144,12 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> {
assert!(
!first_texts
.iter()
.any(|text| text == "user: steer via realtime")
.any(|text| text
== "<realtime_delegation>\n <input>user: steer via realtime</input>\n</realtime_delegation>")
);
assert!(second_texts.iter().any(|text| text == "first prompt"));
assert!(
second_texts
.iter()
.any(|text| text == "user: steer via realtime")
);
assert!(second_texts.iter().any(|text| text
== "<realtime_delegation>\n <input>user: steer via realtime</input>\n</realtime_delegation>"));
realtime_server.shutdown().await;
api_server.shutdown().await;
@@ -3278,7 +3264,9 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio()
assert_eq!(requests.len(), 1);
let first_body: Value = serde_json::from_slice(&requests[0]).expect("parse first request");
let first_texts = message_input_texts(&first_body, "user");
let expected_text = format!("user: {delegated_text}");
let expected_text = format!(
"<realtime_delegation>\n <input>user: {delegated_text}</input>\n</realtime_delegation>"
);
assert!(first_texts.iter().any(|text| text == &expected_text));
realtime_server.shutdown().await;