- Optimize Codex Chat cache stability

- Stop deriving Codex session/cache identity from `previous_response_id`.
  - Canonicalize parseable JSON string payloads in Chat and Responses tool conversions.
  - Add regression coverage for cache-sensitive conversion behavior.
This commit is contained in:
Jason
2026-05-19 20:30:39 +08:00
Unverified
parent 74acf1e387
commit 22fbe6f11a
4 changed files with 148 additions and 26 deletions
+27
View File
@@ -48,6 +48,17 @@ pub(crate) fn canonical_json_string(value: &Value) -> String {
}
}
pub(crate) fn canonicalize_json_string_if_parseable(value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return value.to_string();
}
serde_json::from_str::<Value>(trimmed)
.map(|parsed| canonical_json_string(&parsed))
.unwrap_or_else(|_| value.to_string())
}
pub(crate) fn short_value_hash(value: Option<&Value>) -> String {
let Some(value) = value else {
return "absent".to_string();
@@ -99,4 +110,20 @@ mod tests {
assert_eq!(serde_json::to_string(&value).unwrap(), r#"{"a":1,"b":2}"#);
}
#[test]
fn canonicalize_json_string_if_parseable_sorts_keys_and_removes_whitespace() {
assert_eq!(
canonicalize_json_string_if_parseable(r#"{ "b": 2, "a": 1 }"#),
r#"{"a":1,"b":2}"#
);
}
#[test]
fn canonicalize_json_string_if_parseable_preserves_plain_text() {
assert_eq!(
canonicalize_json_string_if_parseable("plain text"),
"plain text"
);
}
}
@@ -9,6 +9,7 @@ use super::{
chat_usage_to_responses_usage, response_id_from_chat_id, response_status_from_finish_reason,
},
};
use crate::proxy::json_canonical::canonicalize_json_string_if_parseable;
use crate::proxy::sse::{strip_sse_field, take_sse_block};
use bytes::Bytes;
use futures::stream::{Stream, StreamExt};
@@ -688,12 +689,13 @@ impl ChatToResponsesState {
let state = self.tools.get_mut(&key).expect("tool state exists");
let output_index = state.output_index.unwrap_or(0);
let arguments = canonicalize_json_string_if_parseable(&state.arguments);
let item = response_function_call_item(
&state.item_id,
"completed",
&state.call_id,
&state.name,
&state.arguments,
&arguments,
Some(&state.reasoning_content),
);
state.done = true;
@@ -705,7 +707,7 @@ impl ChatToResponsesState {
"type": "response.function_call_arguments.done",
"item_id": state.item_id,
"output_index": output_index,
"arguments": state.arguments
"arguments": arguments
}),
));
events.push(sse_event(
@@ -1003,6 +1005,19 @@ mod tests {
assert!(output.contains("\"call_id\":\"call_1\""));
}
#[tokio::test]
async fn canonicalizes_streamed_tool_call_arguments_on_done_events() {
let output = collect(vec![
"data: {\"id\":\"chatcmpl_args\",\"model\":\"gpt-5.4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"id\":\"call_1\",\"type\":\"function\",\"function\":{\"name\":\"lookup\"}}]}}]}\n\n",
"data: {\"id\":\"chatcmpl_args\",\"model\":\"gpt-5.4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{ \\\"b\\\": 2,\"}}]}}]}\n\n",
"data: {\"id\":\"chatcmpl_args\",\"model\":\"gpt-5.4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\" \\\"a\\\": 1 }\"}}]},\"finish_reason\":\"tool_calls\"}]}\n\n",
"data: [DONE]\n\n",
])
.await;
assert!(output.contains(r#""arguments":"{\"a\":1,\"b\":2}""#));
}
#[tokio::test]
async fn preserves_reasoning_content_on_streamed_tool_call_items() {
let output = collect(vec![
@@ -8,7 +8,10 @@ use super::codex_chat_common::{
append_reasoning_content, extract_reasoning_field_text, extract_reasoning_summary_text,
response_function_call_item, split_leading_think_block,
};
use crate::proxy::{error::ProxyError, json_canonical::canonical_json_string};
use crate::proxy::{
error::ProxyError,
json_canonical::{canonical_json_string, canonicalize_json_string_if_parseable},
};
use serde_json::{json, Value};
const EXTRA_CHAT_PASSTHROUGH_FIELDS: &[&str] = &[
@@ -187,7 +190,7 @@ fn append_responses_item_as_chat_message(
);
let call_id = item.get("call_id").and_then(|v| v.as_str()).unwrap_or("");
let output = match item.get("output") {
Some(Value::String(s)) => s.clone(),
Some(Value::String(s)) => canonicalize_json_string_if_parseable(s),
Some(v) => canonical_json_string(v),
None => String::new(),
};
@@ -484,7 +487,7 @@ fn responses_function_call_to_chat_tool_call(item: &Value) -> Value {
.unwrap_or("");
let name = item.get("name").and_then(|v| v.as_str()).unwrap_or("");
let arguments = match item.get("arguments") {
Some(Value::String(s)) => s.clone(),
Some(Value::String(s)) => canonicalize_json_string_if_parseable(s),
Some(v) => canonical_json_string(v),
None => "{}".to_string(),
};
@@ -734,7 +737,7 @@ fn chat_tool_call_to_response_item(
let function = tool_call.get("function").unwrap_or(&Value::Null);
let name = function.get("name").and_then(|v| v.as_str()).unwrap_or("");
let arguments = match function.get("arguments") {
Some(Value::String(s)) => s.clone(),
Some(Value::String(s)) => canonicalize_json_string_if_parseable(s),
Some(v) => canonical_json_string(v),
None => "{}".to_string(),
};
@@ -757,7 +760,7 @@ fn chat_legacy_function_call_to_response_item(
.and_then(|v| v.as_str())
.unwrap_or("");
let arguments = match function_call.get("arguments") {
Some(Value::String(s)) => s.clone(),
Some(Value::String(s)) => canonicalize_json_string_if_parseable(s),
Some(v) => canonical_json_string(v),
None => "{}".to_string(),
};
@@ -1174,6 +1177,64 @@ mod tests {
assert_eq!(messages[3]["role"], "user");
}
#[test]
fn responses_request_to_chat_canonicalizes_json_string_tool_payloads() {
let input = json!({
"model": "gpt-5.4",
"input": [
{
"type": "function_call",
"call_id": "call_1",
"name": "lookup",
"arguments": "{ \"b\": 2, \"a\": 1 }"
},
{
"type": "function_call_output",
"call_id": "call_1",
"output": "{ \"z\": true, \"a\": [2, 1] }"
}
]
});
let result = responses_to_chat_completions(input).unwrap();
let messages = result["messages"].as_array().unwrap();
assert_eq!(
messages[0]["tool_calls"][0]["function"]["arguments"],
r#"{"a":1,"b":2}"#
);
assert_eq!(messages[1]["content"], r#"{"a":[2,1],"z":true}"#);
}
#[test]
fn responses_request_to_chat_preserves_plain_text_tool_output() {
let input = json!({
"model": "gpt-5.4",
"input": [
{
"type": "function_call",
"call_id": "call_1",
"name": "read_file",
"arguments": "not json"
},
{
"type": "function_call_output",
"call_id": "call_1",
"output": "plain text result"
}
]
});
let result = responses_to_chat_completions(input).unwrap();
let messages = result["messages"].as_array().unwrap();
assert_eq!(
messages[0]["tool_calls"][0]["function"]["arguments"],
"not json"
);
assert_eq!(messages[1]["content"], "plain text result");
}
#[test]
fn chat_response_to_responses_maps_text_tool_calls_and_usage() {
let input = json!({
@@ -1227,6 +1288,35 @@ mod tests {
assert_eq!(result["usage"]["input_tokens_details"]["cached_tokens"], 3);
}
#[test]
fn chat_response_to_responses_canonicalizes_json_string_tool_arguments() {
let input = json!({
"id": "chatcmpl_args",
"object": "chat.completion",
"created": 123,
"model": "gpt-5.4",
"choices": [{
"message": {
"role": "assistant",
"tool_calls": [{
"id": "call_1",
"type": "function",
"function": {
"name": "lookup",
"arguments": "{ \"b\": 2, \"a\": 1 }"
}
}]
},
"finish_reason": "tool_calls"
}]
});
let result = chat_completion_to_response(input).unwrap();
assert_eq!(result["output"][0]["type"], "function_call");
assert_eq!(result["output"][0]["arguments"], r#"{"a":1,"b":2}"#);
}
#[test]
fn chat_response_to_responses_splits_inline_think_content() {
let input = json!({
+9 -19
View File
@@ -6,7 +6,7 @@
//!
//! 支持从客户端请求中提取 Session ID,用于关联同一对话的多个请求:
//! - Claude: 从 `metadata.user_id` (格式: `user_xxx_session_yyy`) 或 `metadata.session_id` 提取
//! - Codex: 从 `previous_response_id` 或 headers 中的 `session_id` 提取
//! - Codex: 从 headers 中的 `session_id` / `x-session-id` 或 `metadata.session_id` 提取
//! - 其他: 生成新的 UUID
use axum::http::HeaderMap;
@@ -197,8 +197,6 @@ pub enum SessionIdSource {
MetadataSessionId,
/// 从 headers 提取 (Codex)
Header,
/// 从 previous_response_id 提取 (Codex)
PreviousResponseId,
/// 新生成
Generated,
}
@@ -228,8 +226,7 @@ pub struct SessionIdResult {
/// ### Codex 请求
/// 1. Headers: `session_id` 或 `x-session-id`
/// 2. `metadata.session_id`
/// 3. `previous_response_id` (对话延续)
/// 4. 生成新 UUID
/// 3. 生成新 UUID
///
/// ## 示例
///
@@ -319,16 +316,9 @@ fn extract_codex_session(headers: &HeaderMap, body: &serde_json::Value) -> Optio
}
}
// 3. 从 previous_response_id 提取(对话延续)
if let Some(prev_id) = body.get("previous_response_id").and_then(|v| v.as_str()) {
if prev_id.len() > 10 {
return Some(SessionIdResult {
session_id: format!("codex_{prev_id}"),
source: SessionIdSource::PreviousResponseId,
client_provided: true,
});
}
}
// previous_response_id 是 Responses 协议里的响应游标,不是稳定会话身份。
// Chat/Responses 桥接时该值通常来自上游每轮返回的随机 response id
// 若把它当 prompt_cache_key 或 Codex session header,会导致每轮请求换缓存 key。
None
}
@@ -585,7 +575,7 @@ mod tests {
}
#[test]
fn test_extract_session_from_codex_previous_response_id() {
fn test_codex_previous_response_id_is_not_stable_session_identity() {
let headers = HeaderMap::new();
let body = json!({
"input": "Write a function",
@@ -594,9 +584,9 @@ mod tests {
let result = extract_session_id(&headers, &body, "codex");
assert_eq!(result.session_id, "codex_resp_abc123def456789");
assert_eq!(result.source, SessionIdSource::PreviousResponseId);
assert!(result.client_provided);
assert!(!result.session_id.is_empty());
assert_eq!(result.source, SessionIdSource::Generated);
assert!(!result.client_provided);
}
#[test]