diff --git a/codex-rs/core/src/realtime_conversation.rs b/codex-rs/core/src/realtime_conversation.rs
index e74973671..f81d6c094 100644
--- a/codex-rs/core/src/realtime_conversation.rs
+++ b/codex-rs/core/src/realtime_conversation.rs
@@ -805,7 +805,9 @@ async fn handle_start_inner(
if let Some(text) = maybe_routed_text {
debug!(text = %text, "[realtime-text] realtime conversation text output");
let sess_for_routed_text = Arc::clone(&sess_clone);
- sess_for_routed_text.route_realtime_text_input(text).await;
+ sess_for_routed_text
+ .route_realtime_text_input(wrap_realtime_delegation_input(&text))
+ .await;
}
if !fanout_realtime_active.load(Ordering::Relaxed) {
break;
@@ -867,6 +869,20 @@ fn realtime_text_from_handoff_request(handoff: &RealtimeHandoffRequested) -> Opt
.or((!handoff.input_transcript.is_empty()).then_some(handoff.input_transcript.clone()))
}
+fn wrap_realtime_delegation_input(input: &str) -> String {
+ format!(
+ "\n {}\n",
+ escape_xml_text(input)
+ )
+}
+
+fn escape_xml_text(input: &str) -> String {
+ input
+ .replace('&', "&")
+ .replace('<', "<")
+ .replace('>', ">")
+}
+
fn realtime_api_key(auth: Option<&CodexAuth>, provider: &ModelProviderInfo) -> CodexResult {
if let Some(api_key) = provider.api_key()? {
return Ok(api_key);
diff --git a/codex-rs/core/src/realtime_conversation_tests.rs b/codex-rs/core/src/realtime_conversation_tests.rs
index 0a32d063c..cda130e18 100644
--- a/codex-rs/core/src/realtime_conversation_tests.rs
+++ b/codex-rs/core/src/realtime_conversation_tests.rs
@@ -1,6 +1,7 @@
use super::RealtimeHandoffState;
use super::RealtimeSessionKind;
use super::realtime_text_from_handoff_request;
+use super::wrap_realtime_delegation_input;
use async_channel::bounded;
use codex_protocol::protocol::RealtimeHandoffRequested;
use codex_protocol::protocol::RealtimeTranscriptEntry;
@@ -54,6 +55,22 @@ fn ignores_empty_handoff_request_input_transcript() {
assert_eq!(realtime_text_from_handoff_request(&handoff), None);
}
+#[test]
+fn wraps_realtime_delegation_input() {
+ assert_eq!(
+ wrap_realtime_delegation_input("hello"),
+ "\n hello\n"
+ );
+}
+
+#[test]
+fn wraps_realtime_delegation_input_with_xml_escaping() {
+ assert_eq!(
+ wrap_realtime_delegation_input("use a < b && c > d"),
+ "\n use a < b && c > d\n"
+ );
+}
+
#[tokio::test]
async fn clears_active_handoff_explicitly() {
let (tx, _rx) = bounded(1);
diff --git a/codex-rs/core/tests/suite/realtime_conversation.rs b/codex-rs/core/tests/suite/realtime_conversation.rs
index 4759f5daf..add4065f7 100644
--- a/codex-rs/core/tests/suite/realtime_conversation.rs
+++ b/codex-rs/core/tests/suite/realtime_conversation.rs
@@ -2417,11 +2417,8 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> {
let request = response_mock.single_request();
let user_texts = request.message_input_texts("user");
- assert!(
- user_texts
- .iter()
- .any(|text| text == "user: text from realtime")
- );
+ assert!(user_texts.iter().any(|text| text
+ == "\n user: text from realtime\n"));
realtime_server.shutdown().await;
Ok(())
@@ -2503,7 +2500,7 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> {
let request = response_mock.single_request();
let user_texts = request.message_input_texts("user");
assert!(user_texts.iter().any(|text| text
- == "assistant: assistant context\nuser: delegated query\nassistant: assist confirm"));
+ == "\n assistant: assistant context\nuser: delegated query\nassistant: assist confirm\n"));
realtime_server.shutdown().await;
Ok(())
@@ -2617,23 +2614,14 @@ async fn inbound_handoff_request_clears_active_transcript_after_each_handoff() -
assert_eq!(requests.len(), 2);
let first_user_texts = requests[0].message_input_texts("user");
- assert!(
- first_user_texts
- .iter()
- .any(|text| text == "user: first question")
- );
+ assert!(first_user_texts.iter().any(|text| text
+ == "\n user: first question\n"));
let second_user_texts = requests[1].message_input_texts("user");
- assert!(
- second_user_texts
- .iter()
- .any(|text| text == "user: second question")
- );
- assert!(
- !second_user_texts
- .iter()
- .any(|text| text == "user: first question\nuser: second question")
- );
+ assert!(second_user_texts.iter().any(|text| text
+ == "\n user: second question\n"));
+ assert!(!second_user_texts.iter().any(|text| text
+ == "\n user: first question\nuser: second question\n"));
realtime_server.shutdown().await;
Ok(())
@@ -3156,14 +3144,12 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> {
assert!(
!first_texts
.iter()
- .any(|text| text == "user: steer via realtime")
+ .any(|text| text
+ == "\n user: steer via realtime\n")
);
assert!(second_texts.iter().any(|text| text == "first prompt"));
- assert!(
- second_texts
- .iter()
- .any(|text| text == "user: steer via realtime")
- );
+ assert!(second_texts.iter().any(|text| text
+ == "\n user: steer via realtime\n"));
realtime_server.shutdown().await;
api_server.shutdown().await;
@@ -3278,7 +3264,9 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio()
assert_eq!(requests.len(), 1);
let first_body: Value = serde_json::from_slice(&requests[0]).expect("parse first request");
let first_texts = message_input_texts(&first_body, "user");
- let expected_text = format!("user: {delegated_text}");
+ let expected_text = format!(
+ "\n user: {delegated_text}\n"
+ );
assert!(first_texts.iter().any(|text| text == &expected_text));
realtime_server.shutdown().await;