diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 9ef7a2df..5b76d02a 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -124,7 +124,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r body = disableThinkingIfToolChoiceForced(body) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) - body = ensureCacheControl(body) + if countCacheControls(body) == 0 { + body = ensureCacheControl(body) + } // Extract betas from body and convert to header var extraBetas []string @@ -262,7 +264,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A body = disableThinkingIfToolChoiceForced(body) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) - body = ensureCacheControl(body) + if countCacheControls(body) == 0 { + body = ensureCacheControl(body) + } // Extract betas from body and convert to header var extraBetas []string @@ -1033,6 +1037,51 @@ func ensureCacheControl(payload []byte) []byte { return payload } +func countCacheControls(payload []byte) int { + count := 0 + + // Check system + system := gjson.GetBytes(payload, "system") + if system.IsArray() { + system.ForEach(func(_, item gjson.Result) bool { + if item.Get("cache_control").Exists() { + count++ + } + return true + }) + } + + // Check tools + tools := gjson.GetBytes(payload, "tools") + if tools.IsArray() { + tools.ForEach(func(_, item gjson.Result) bool { + if item.Get("cache_control").Exists() { + count++ + } + return true + }) + } + + // Check messages + messages := gjson.GetBytes(payload, "messages") + if messages.IsArray() { + messages.ForEach(func(_, msg gjson.Result) bool { + content := msg.Get("content") + if content.IsArray() { + content.ForEach(func(_, item gjson.Result) bool { + if item.Get("cache_control").Exists() { + count++ + } + return true + }) + } + return true + }) + } + + return count +} + // injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching. // Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache." // This enables caching of conversation history, which is especially beneficial for long multi-turn conversations.