mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
[codex] add roles to realtime append text (#27936)
## Summary Add an explicit `user` or `developer` role to `thread/realtime/appendText` and propagate it through the realtime input queue into `conversation.item.create`. Older JSON clients that omit the field continue to default to `user`. This lets app-provided context such as memory retain developer authority without bypassing app-server through a renderer-owned data channel. The app-server schemas, API documentation, and focused protocol and websocket coverage are updated with the new contract. The Codex Apps consumer is tracked in [openai/openai#1025261](https://github.com/openai/openai/pull/1025261).
This commit is contained in:
committed by
GitHub
Unverified
parent
9915d34684
commit
216dee1189
@@ -17,6 +17,7 @@ use crate::error::ApiError;
|
||||
use crate::provider::Provider;
|
||||
use codex_client::backoff;
|
||||
use codex_client::maybe_build_rustls_client_config_with_custom_ca;
|
||||
use codex_protocol::protocol::ConversationTextRole;
|
||||
use codex_protocol::protocol::RealtimeTranscriptDelta;
|
||||
use codex_utils_rustls_provider::ensure_rustls_crypto_provider;
|
||||
use futures::SinkExt;
|
||||
@@ -227,8 +228,12 @@ impl RealtimeWebsocketConnection {
|
||||
self.writer.send_audio_frame(frame).await
|
||||
}
|
||||
|
||||
pub async fn send_conversation_item_create(&self, text: String) -> Result<(), ApiError> {
|
||||
self.writer.send_conversation_item_create(text).await
|
||||
pub async fn send_conversation_item_create(
|
||||
&self,
|
||||
text: String,
|
||||
role: ConversationTextRole,
|
||||
) -> Result<(), ApiError> {
|
||||
self.writer.send_conversation_item_create(text, role).await
|
||||
}
|
||||
|
||||
pub async fn send_conversation_function_call_output(
|
||||
@@ -286,9 +291,17 @@ impl RealtimeWebsocketWriter {
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn send_conversation_item_create(&self, text: String) -> Result<(), ApiError> {
|
||||
self.send_json(&conversation_item_create_message(self.event_parser, text))
|
||||
.await
|
||||
pub async fn send_conversation_item_create(
|
||||
&self,
|
||||
text: String,
|
||||
role: ConversationTextRole,
|
||||
) -> Result<(), ApiError> {
|
||||
self.send_json(&conversation_item_create_message(
|
||||
self.event_parser,
|
||||
text,
|
||||
role,
|
||||
))
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn send_conversation_handoff_append(
|
||||
@@ -1609,6 +1622,7 @@ mod tests {
|
||||
.expect("text");
|
||||
let third_json: Value = serde_json::from_str(&third).expect("json");
|
||||
assert_eq!(third_json["type"], "conversation.item.create");
|
||||
assert_eq!(third_json["item"]["role"], "developer");
|
||||
assert_eq!(
|
||||
third_json["item"]["content"][0]["type"],
|
||||
Value::String("input_text".to_string())
|
||||
@@ -1746,7 +1760,10 @@ mod tests {
|
||||
.await
|
||||
.expect("send audio");
|
||||
connection
|
||||
.send_conversation_item_create("hello agent".to_string())
|
||||
.send_conversation_item_create(
|
||||
"hello agent".to_string(),
|
||||
ConversationTextRole::Developer,
|
||||
)
|
||||
.await
|
||||
.expect("send item");
|
||||
connection
|
||||
@@ -1948,6 +1965,7 @@ mod tests {
|
||||
.expect("text");
|
||||
let second_json: Value = serde_json::from_str(&second).expect("json");
|
||||
assert_eq!(second_json["type"], "conversation.item.create");
|
||||
assert_eq!(second_json["item"]["role"], "developer");
|
||||
assert_eq!(
|
||||
second_json["item"]["type"],
|
||||
Value::String("message".to_string())
|
||||
@@ -2030,7 +2048,10 @@ mod tests {
|
||||
);
|
||||
|
||||
connection
|
||||
.send_conversation_item_create("delegate this".to_string())
|
||||
.send_conversation_item_create(
|
||||
"delegate this".to_string(),
|
||||
ConversationTextRole::Developer,
|
||||
)
|
||||
.await
|
||||
.expect("send text item");
|
||||
connection
|
||||
|
||||
@@ -13,6 +13,7 @@ use crate::endpoint::realtime_websocket::protocol::RealtimeSessionConfig;
|
||||
use crate::endpoint::realtime_websocket::protocol::RealtimeSessionMode;
|
||||
use crate::endpoint::realtime_websocket::protocol::RealtimeVoice;
|
||||
use crate::endpoint::realtime_websocket::protocol::SessionUpdateSession;
|
||||
use codex_protocol::protocol::ConversationTextRole;
|
||||
use serde_json::Result as JsonResult;
|
||||
use serde_json::Value;
|
||||
use serde_json::to_value;
|
||||
@@ -33,10 +34,11 @@ pub(super) fn normalized_session_mode(
|
||||
pub(super) fn conversation_item_create_message(
|
||||
event_parser: RealtimeEventParser,
|
||||
text: String,
|
||||
role: ConversationTextRole,
|
||||
) -> RealtimeOutboundMessage {
|
||||
match event_parser {
|
||||
RealtimeEventParser::V1 => v1_conversation_item_create_message(text),
|
||||
RealtimeEventParser::RealtimeV2 => v2_conversation_item_create_message(text),
|
||||
RealtimeEventParser::V1 => v1_conversation_item_create_message(text, role),
|
||||
RealtimeEventParser::RealtimeV2 => v2_conversation_item_create_message(text, role),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ use crate::endpoint::realtime_websocket::protocol::ConversationItemContent;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationItemPayload;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationItemType;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationMessageItem;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationRole;
|
||||
use crate::endpoint::realtime_websocket::protocol::RealtimeOutboundMessage;
|
||||
use crate::endpoint::realtime_websocket::protocol::RealtimeVoice;
|
||||
use crate::endpoint::realtime_websocket::protocol::SessionAudio;
|
||||
@@ -14,12 +13,16 @@ use crate::endpoint::realtime_websocket::protocol::SessionAudioInput;
|
||||
use crate::endpoint::realtime_websocket::protocol::SessionAudioOutput;
|
||||
use crate::endpoint::realtime_websocket::protocol::SessionType;
|
||||
use crate::endpoint::realtime_websocket::protocol::SessionUpdateSession;
|
||||
use codex_protocol::protocol::ConversationTextRole;
|
||||
|
||||
pub(super) fn conversation_item_create_message(text: String) -> RealtimeOutboundMessage {
|
||||
pub(super) fn conversation_item_create_message(
|
||||
text: String,
|
||||
role: ConversationTextRole,
|
||||
) -> RealtimeOutboundMessage {
|
||||
RealtimeOutboundMessage::ConversationItemCreate {
|
||||
item: ConversationItemPayload::Message(ConversationMessageItem {
|
||||
r#type: ConversationItemType::Message,
|
||||
role: ConversationRole::User,
|
||||
role,
|
||||
content: vec![ConversationItemContent {
|
||||
r#type: ConversationContentType::InputText,
|
||||
text,
|
||||
|
||||
@@ -6,7 +6,6 @@ use crate::endpoint::realtime_websocket::protocol::ConversationItemContent;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationItemPayload;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationItemType;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationMessageItem;
|
||||
use crate::endpoint::realtime_websocket::protocol::ConversationRole;
|
||||
use crate::endpoint::realtime_websocket::protocol::NoiseReductionType;
|
||||
use crate::endpoint::realtime_websocket::protocol::RealtimeOutboundMessage;
|
||||
use crate::endpoint::realtime_websocket::protocol::RealtimeOutputModality;
|
||||
@@ -25,6 +24,7 @@ use crate::endpoint::realtime_websocket::protocol::SessionTurnDetection;
|
||||
use crate::endpoint::realtime_websocket::protocol::SessionType;
|
||||
use crate::endpoint::realtime_websocket::protocol::SessionUpdateSession;
|
||||
use crate::endpoint::realtime_websocket::protocol::TurnDetectionType;
|
||||
use codex_protocol::protocol::ConversationTextRole;
|
||||
use serde_json::json;
|
||||
|
||||
const REALTIME_V2_OUTPUT_MODALITY_AUDIO: &str = "audio";
|
||||
@@ -36,11 +36,14 @@ const REALTIME_V2_SILENCE_TOOL_NAME: &str = "remain_silent";
|
||||
const REALTIME_V2_SILENCE_TOOL_DESCRIPTION: &str = "Call this when the best response is to say nothing. Use it instead of speaking after hidden system/control messages, after background agent updates in silent modes, or whenever acknowledging aloud would be distracting. This tool has no user-visible effect.";
|
||||
const REALTIME_V2_INPUT_TRANSCRIPTION_MODEL: &str = "gpt-4o-mini-transcribe";
|
||||
|
||||
pub(super) fn conversation_item_create_message(text: String) -> RealtimeOutboundMessage {
|
||||
pub(super) fn conversation_item_create_message(
|
||||
text: String,
|
||||
role: ConversationTextRole,
|
||||
) -> RealtimeOutboundMessage {
|
||||
RealtimeOutboundMessage::ConversationItemCreate {
|
||||
item: ConversationItemPayload::Message(ConversationMessageItem {
|
||||
r#type: ConversationItemType::Message,
|
||||
role: ConversationRole::User,
|
||||
role,
|
||||
content: vec![ConversationItemContent {
|
||||
r#type: ConversationContentType::InputText,
|
||||
text,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::endpoint::realtime_websocket::protocol_v1::parse_realtime_event_v1;
|
||||
use crate::endpoint::realtime_websocket::protocol_v2::parse_realtime_event_v2;
|
||||
use codex_protocol::protocol::ConversationTextRole;
|
||||
pub use codex_protocol::protocol::RealtimeAudioFrame;
|
||||
pub use codex_protocol::protocol::RealtimeEvent;
|
||||
pub use codex_protocol::protocol::RealtimeOutputModality;
|
||||
@@ -157,7 +158,7 @@ pub(super) struct SessionAudioOutputFormat {
|
||||
pub(super) struct ConversationMessageItem {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) r#type: ConversationItemType,
|
||||
pub(super) role: ConversationRole,
|
||||
pub(super) role: ConversationTextRole,
|
||||
pub(super) content: Vec<ConversationItemContent>,
|
||||
}
|
||||
|
||||
@@ -168,12 +169,6 @@ pub(super) enum ConversationItemType {
|
||||
FunctionCallOutput,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum ConversationRole {
|
||||
User,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(untagged)]
|
||||
pub(super) enum ConversationItemPayload {
|
||||
|
||||
Reference in New Issue
Block a user