From 313a6e3f6ce43bcba49774576bf34bbf338fd4fd Mon Sep 17 00:00:00 2001 From: Dex Miller Date: Mon, 13 Apr 2026 10:42:29 +0800 Subject: [PATCH] [codex] Preserve cache_control when merging system prompts (#1946) * Preserve cache hints when collapsing system prompts Strict OpenAI-compatible chat backends still need fragmented Claude\nsystem prompts collapsed into one leading system message, but that\nnormalization should not silently drop stable cache hints. Preserve\nmessage-level cache_control when the merged system fragments agree,\nand fall back to omitting it when the fragments conflict.\n\nConstraint: Must keep single-system normalization for Nvidia/Qwen-style chat backends\nRejected: Always copy the first cache_control | could misrepresent conflicting cache boundaries\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: If system prompt merging changes again, preserve cache_control whenever the merged metadata is unambiguous\nTested: cargo test proxy::providers::transform --manifest-path src-tauri/Cargo.toml\nNot-tested: End-to-end prompt caching behavior against cache-aware OpenAI-compatible upstreams\nRelated: #1881 * Tighten cache hint inheritance for merged system prompts The follow-up cache hint fix still treated mixed present/absent\ncache_control across fragmented system prompts as inheritable, which\nexpanded the cache scope after prompt collapse. Treat that mix as\nambiguous and only preserve cache_control when every merged fragment\nexplicitly agrees on the same value.\n\nConstraint: Must preserve strict-backend system prompt normalization from #1942\nRejected: Inherit first present cache_control | widens cache scope when later fragments were intentionally uncached\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Any future merged-system cache hint logic should treat missing cache_control as semantically significant\nTested: cargo test proxy::providers::transform --manifest-path src-tauri/Cargo.toml\nNot-tested: End-to-end upstream caching behavior against cache-aware relays\nRelated: #1881\nRelated: #1946 * Keep cache-control merge regressions easy to review Reflow the two long cache-control regression assertions in transform.rs so the neighboring merge cases stay rustfmt-aligned and easier to scan. This keeps the preserved code change separate from the untracked Markdown design notes the user did not want committed. Constraint: Exclude Markdown design files from the commit while preserving the local code change Rejected: Include docs in the same commit | user explicitly asked to leave Markdown files out Confidence: high Scope-risk: narrow Reversibility: clean Directive: Treat this as a readability-only test change; do not infer runtime behavior changes from it Tested: cargo test --manifest-path src-tauri/Cargo.toml test_anthropic_to_openai_drops_ --lib Tested: cargo check --manifest-path src-tauri/Cargo.toml --tests Tested: pnpm format:check Tested: pnpm typecheck Not-tested: Full application integration and manual flows --- src-tauri/src/proxy/providers/transform.rs | 79 +++++++++++++++++++--- 1 file changed, 70 insertions(+), 9 deletions(-) diff --git a/src-tauri/src/proxy/providers/transform.rs b/src-tauri/src/proxy/providers/transform.rs index a41d18501..5e7869cda 100644 --- a/src-tauri/src/proxy/providers/transform.rs +++ b/src-tauri/src/proxy/providers/transform.rs @@ -199,6 +199,10 @@ fn normalize_openai_system_messages(messages: &mut Vec) { } let mut parts = Vec::new(); + let mut inherited_cache_control: Option = None; + let mut cache_control_conflict = false; + let mut saw_cache_control = false; + let mut saw_missing_cache_control = false; messages.retain(|message| { if message.get("role").and_then(|value| value.as_str()) != Some("system") { return true; @@ -219,11 +223,28 @@ fn normalize_openai_system_messages(messages: &mut Vec) { _ => {} } + if let Some(cache_control) = message.get("cache_control") { + saw_cache_control = true; + match &inherited_cache_control { + None => inherited_cache_control = Some(cache_control.clone()), + Some(existing) if existing == cache_control => {} + Some(_) => cache_control_conflict = true, + } + } else { + saw_missing_cache_control = true; + } + false }); if !parts.is_empty() { - messages.insert(0, json!({"role": "system", "content": parts.join("\n")})); + let mut merged = json!({"role": "system", "content": parts.join("\n")}); + if !(cache_control_conflict || (saw_cache_control && saw_missing_cache_control)) { + if let Some(cache_control) = inherited_cache_control { + merged["cache_control"] = cache_control; + } + } + messages.insert(0, merged); } } @@ -606,18 +627,15 @@ mod tests { } #[test] - fn test_anthropic_to_openai_normalizes_fragmented_system_messages() { + fn test_anthropic_to_openai_preserves_matching_system_cache_control_when_merging() { let input = json!({ "model": "claude-3-sonnet", "max_tokens": 1024, "system": [ - {"type": "text", "text": "You are Claude Code."}, - {"type": "text", "text": "Be concise."} + {"type": "text", "text": "You are Claude Code.", "cache_control": {"type": "ephemeral"}}, + {"type": "text", "text": "Be concise.", "cache_control": {"type": "ephemeral"}} ], - "messages": [ - {"role": "system", "content": "Follow repo conventions."}, - {"role": "user", "content": "Hello"} - ] + "messages": [{"role": "user", "content": "Hello"}] }); let result = anthropic_to_openai(input).unwrap(); @@ -625,11 +643,54 @@ mod tests { assert_eq!(result["messages"][0]["role"], "system"); assert_eq!( result["messages"][0]["content"], - "You are Claude Code.\nBe concise.\nFollow repo conventions." + "You are Claude Code.\nBe concise." ); + assert_eq!(result["messages"][0]["cache_control"]["type"], "ephemeral"); assert_eq!(result["messages"][1]["role"], "user"); } + #[test] + fn test_anthropic_to_openai_drops_mixed_present_absent_system_cache_control_when_merging() { + let input = json!({ + "model": "claude-3-sonnet", + "max_tokens": 1024, + "system": [ + {"type": "text", "text": "You are Claude Code.", "cache_control": {"type": "ephemeral"}}, + {"type": "text", "text": "Be concise."} + ], + "messages": [{"role": "user", "content": "Hello"}] + }); + + let result = anthropic_to_openai(input, None).unwrap(); + assert_eq!(result["messages"][0]["role"], "system"); + assert_eq!( + result["messages"][0]["content"], + "You are Claude Code.\nBe concise." + ); + assert!(result["messages"][0].get("cache_control").is_none()); + } + + #[test] + fn test_anthropic_to_openai_drops_conflicting_system_cache_control_when_merging() { + let input = json!({ + "model": "claude-3-sonnet", + "max_tokens": 1024, + "system": [ + {"type": "text", "text": "You are Claude Code.", "cache_control": {"type": "ephemeral"}}, + {"type": "text", "text": "Be concise.", "cache_control": {"type": "ephemeral", "ttl": "5m"}} + ], + "messages": [{"role": "user", "content": "Hello"}] + }); + + let result = anthropic_to_openai(input, None).unwrap(); + assert_eq!(result["messages"][0]["role"], "system"); + assert_eq!( + result["messages"][0]["content"], + "You are Claude Code.\nBe concise." + ); + assert!(result["messages"][0].get("cache_control").is_none()); + } + #[test] fn test_anthropic_to_openai_tool_use() { let input = json!({