From 3a43ecb19b698ad60c30b6280ca6a5bd92ec228d Mon Sep 17 00:00:00 2001 From: Martin Schneeweiss Date: Thu, 29 Jan 2026 00:32:04 +0100 Subject: [PATCH] feat(caching): implement Claude prompt caching with multi-turn support - Add ensureCacheControl() to auto-inject cache breakpoints - Cache tools (last tool), system (last element), and messages (2nd-to-last user turn) - Add prompt-caching-2024-07-31 beta header - Return original payload on sjson error to prevent corruption - Include verification test for caching logic Enables up to 90% cost reduction on cached tokens. Co-Authored-By: Claude Opus 4.5 --- .../runtime/executor/caching_verify_test.go | 210 +++++++++++++++++ internal/runtime/executor/claude_executor.go | 219 +++++++++++++++++- 2 files changed, 428 insertions(+), 1 deletion(-) create mode 100644 internal/runtime/executor/caching_verify_test.go diff --git a/internal/runtime/executor/caching_verify_test.go b/internal/runtime/executor/caching_verify_test.go new file mode 100644 index 00000000..599c1aec --- /dev/null +++ b/internal/runtime/executor/caching_verify_test.go @@ -0,0 +1,210 @@ +package executor + +import ( + "fmt" + "testing" + + "github.com/tidwall/gjson" +) + +func TestEnsureCacheControl(t *testing.T) { + // Test case 1: System prompt as string + t.Run("String System Prompt", func(t *testing.T) { + input := []byte(`{"model": "claude-3-5-sonnet", "system": "This is a long system prompt", "messages": []}`) + output := ensureCacheControl(input) + + res := gjson.GetBytes(output, "system.0.cache_control.type") + if res.String() != "ephemeral" { + t.Errorf("cache_control not found in system string. Output: %s", string(output)) + } + }) + + // Test case 2: System prompt as array + t.Run("Array System Prompt", func(t *testing.T) { + input := []byte(`{"model": "claude-3-5-sonnet", "system": [{"type": "text", "text": "Part 1"}, {"type": "text", "text": "Part 2"}], "messages": []}`) + output := ensureCacheControl(input) + + // cache_control should only be on the LAST element + res0 := gjson.GetBytes(output, "system.0.cache_control") + res1 := gjson.GetBytes(output, "system.1.cache_control.type") + + if res0.Exists() { + t.Errorf("cache_control should NOT be on the first element") + } + if res1.String() != "ephemeral" { + t.Errorf("cache_control not found on last system element. Output: %s", string(output)) + } + }) + + // Test case 3: Tools are cached + t.Run("Tools Caching", func(t *testing.T) { + input := []byte(`{ + "model": "claude-3-5-sonnet", + "tools": [ + {"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}}, + {"name": "tool2", "description": "Second tool", "input_schema": {"type": "object"}} + ], + "system": "System prompt", + "messages": [] + }`) + output := ensureCacheControl(input) + + // cache_control should only be on the LAST tool + tool0Cache := gjson.GetBytes(output, "tools.0.cache_control") + tool1Cache := gjson.GetBytes(output, "tools.1.cache_control.type") + + if tool0Cache.Exists() { + t.Errorf("cache_control should NOT be on the first tool") + } + if tool1Cache.String() != "ephemeral" { + t.Errorf("cache_control not found on last tool. Output: %s", string(output)) + } + + // System should also have cache_control + systemCache := gjson.GetBytes(output, "system.0.cache_control.type") + if systemCache.String() != "ephemeral" { + t.Errorf("cache_control not found in system. Output: %s", string(output)) + } + }) + + // Test case 4: Tools and system are INDEPENDENT breakpoints + // Per Anthropic docs: Up to 4 breakpoints allowed, tools and system are cached separately + t.Run("Independent Cache Breakpoints", func(t *testing.T) { + input := []byte(`{ + "model": "claude-3-5-sonnet", + "tools": [ + {"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}, "cache_control": {"type": "ephemeral"}} + ], + "system": [{"type": "text", "text": "System"}], + "messages": [] + }`) + output := ensureCacheControl(input) + + // Tool already has cache_control - should not be changed + tool0Cache := gjson.GetBytes(output, "tools.0.cache_control.type") + if tool0Cache.String() != "ephemeral" { + t.Errorf("existing cache_control was incorrectly removed") + } + + // System SHOULD get cache_control because it is an INDEPENDENT breakpoint + // Tools and system are separate cache levels in the hierarchy + systemCache := gjson.GetBytes(output, "system.0.cache_control.type") + if systemCache.String() != "ephemeral" { + t.Errorf("system should have its own cache_control breakpoint (independent of tools)") + } + }) + + // Test case 5: Only tools, no system + t.Run("Only Tools No System", func(t *testing.T) { + input := []byte(`{ + "model": "claude-3-5-sonnet", + "tools": [ + {"name": "tool1", "description": "Tool", "input_schema": {"type": "object"}} + ], + "messages": [{"role": "user", "content": "Hi"}] + }`) + output := ensureCacheControl(input) + + toolCache := gjson.GetBytes(output, "tools.0.cache_control.type") + if toolCache.String() != "ephemeral" { + t.Errorf("cache_control not found on tool. Output: %s", string(output)) + } + }) + + // Test case 6: Many tools (Claude Code scenario) + t.Run("Many Tools (Claude Code Scenario)", func(t *testing.T) { + // Simulate Claude Code with many tools + toolsJSON := `[` + for i := 0; i < 50; i++ { + if i > 0 { + toolsJSON += "," + } + toolsJSON += fmt.Sprintf(`{"name": "tool%d", "description": "Tool %d", "input_schema": {"type": "object"}}`, i, i) + } + toolsJSON += `]` + + input := []byte(fmt.Sprintf(`{ + "model": "claude-3-5-sonnet", + "tools": %s, + "system": [{"type": "text", "text": "You are Claude Code"}], + "messages": [{"role": "user", "content": "Hello"}] + }`, toolsJSON)) + + output := ensureCacheControl(input) + + // Only the last tool (index 49) should have cache_control + for i := 0; i < 49; i++ { + path := fmt.Sprintf("tools.%d.cache_control", i) + if gjson.GetBytes(output, path).Exists() { + t.Errorf("tool %d should NOT have cache_control", i) + } + } + + lastToolCache := gjson.GetBytes(output, "tools.49.cache_control.type") + if lastToolCache.String() != "ephemeral" { + t.Errorf("last tool (49) should have cache_control") + } + + // System should also have cache_control + systemCache := gjson.GetBytes(output, "system.0.cache_control.type") + if systemCache.String() != "ephemeral" { + t.Errorf("system should have cache_control") + } + + t.Log("test passed: 50 tools - cache_control only on last tool") + }) + + // Test case 7: Empty tools array + t.Run("Empty Tools Array", func(t *testing.T) { + input := []byte(`{"model": "claude-3-5-sonnet", "tools": [], "system": "Test", "messages": []}`) + output := ensureCacheControl(input) + + // System should still get cache_control + systemCache := gjson.GetBytes(output, "system.0.cache_control.type") + if systemCache.String() != "ephemeral" { + t.Errorf("system should have cache_control even with empty tools array") + } + }) +} + +// TestCacheControlOrder verifies the correct order: tools -> system -> messages +func TestCacheControlOrder(t *testing.T) { + input := []byte(`{ + "model": "claude-sonnet-4", + "tools": [ + {"name": "Read", "description": "Read file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}}, + {"name": "Write", "description": "Write file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}}} + ], + "system": [ + {"type": "text", "text": "You are Claude Code, Anthropic's official CLI for Claude."}, + {"type": "text", "text": "Additional instructions here..."} + ], + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`) + + output := ensureCacheControl(input) + + // 1. Last tool has cache_control + if gjson.GetBytes(output, "tools.1.cache_control.type").String() != "ephemeral" { + t.Error("last tool should have cache_control") + } + + // 2. First tool has NO cache_control + if gjson.GetBytes(output, "tools.0.cache_control").Exists() { + t.Error("first tool should NOT have cache_control") + } + + // 3. Last system element has cache_control + if gjson.GetBytes(output, "system.1.cache_control.type").String() != "ephemeral" { + t.Error("last system element should have cache_control") + } + + // 4. First system element has NO cache_control + if gjson.GetBytes(output, "system.0.cache_control").Exists() { + t.Error("first system element should NOT have cache_control") + } + + t.Log("cache order correct: tools -> system") +} diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 170ebb90..3edf5080 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -120,6 +120,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) + // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) + body = ensureCacheControl(body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -252,6 +255,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) + // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) + body = ensureCacheControl(body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -636,7 +642,7 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, ginHeaders = ginCtx.Request.Header } - baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14" + baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,prompt-caching-2024-07-31" if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" { baseBetas = val if !strings.Contains(val, "oauth") { @@ -990,3 +996,214 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A return payload } + +// ensureCacheControl injects cache_control breakpoints into the payload for optimal prompt caching. +// According to Anthropic's documentation, cache prefixes are created in order: tools -> system -> messages. +// This function adds cache_control to: +// 1. The LAST tool in the tools array (caches all tool definitions) +// 2. The LAST element in the system array (caches system prompt) +// 3. The SECOND-TO-LAST user turn (caches conversation history for multi-turn) +// +// Up to 4 cache breakpoints are allowed per request. Tools, System, and Messages are INDEPENDENT breakpoints. +// This enables up to 90% cost reduction on cached tokens (cache read = 0.1x base price). +// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching +func ensureCacheControl(payload []byte) []byte { + // 1. Inject cache_control into the LAST tool (caches all tool definitions) + // Tools are cached first in the hierarchy, so this is the most important breakpoint. + payload = injectToolsCacheControl(payload) + + // 2. Inject cache_control into the LAST system prompt element + // System is the second level in the cache hierarchy. + payload = injectSystemCacheControl(payload) + + // 3. Inject cache_control into messages for multi-turn conversation caching + // This caches the conversation history up to the second-to-last user turn. + payload = injectMessagesCacheControl(payload) + + return payload +} + +// injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching. +// Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache." +// This enables caching of conversation history, which is especially beneficial for long multi-turn conversations. +// Only adds cache_control if: +// - There are at least 2 user turns in the conversation +// - No message content already has cache_control +func injectMessagesCacheControl(payload []byte) []byte { + messages := gjson.GetBytes(payload, "messages") + if !messages.Exists() || !messages.IsArray() { + return payload + } + + // Check if ANY message content already has cache_control + hasCacheControlInMessages := false + messages.ForEach(func(_, msg gjson.Result) bool { + content := msg.Get("content") + if content.IsArray() { + content.ForEach(func(_, item gjson.Result) bool { + if item.Get("cache_control").Exists() { + hasCacheControlInMessages = true + return false + } + return true + }) + } + return !hasCacheControlInMessages + }) + if hasCacheControlInMessages { + return payload + } + + // Find all user message indices + var userMsgIndices []int + messages.ForEach(func(index gjson.Result, msg gjson.Result) bool { + if msg.Get("role").String() == "user" { + userMsgIndices = append(userMsgIndices, int(index.Int())) + } + return true + }) + + // Need at least 2 user turns to cache the second-to-last + if len(userMsgIndices) < 2 { + return payload + } + + // Get the second-to-last user message index + secondToLastUserIdx := userMsgIndices[len(userMsgIndices)-2] + + // Get the content of this message + contentPath := fmt.Sprintf("messages.%d.content", secondToLastUserIdx) + content := gjson.GetBytes(payload, contentPath) + + if content.IsArray() { + // Add cache_control to the last content block of this message + contentCount := int(content.Get("#").Int()) + if contentCount > 0 { + cacheControlPath := fmt.Sprintf("messages.%d.content.%d.cache_control", secondToLastUserIdx, contentCount-1) + result, err := sjson.SetBytes(payload, cacheControlPath, map[string]string{"type": "ephemeral"}) + if err != nil { + log.Warnf("failed to inject cache_control into messages: %v", err) + return payload + } + payload = result + } + } else if content.Type == gjson.String { + // Convert string content to array with cache_control + text := content.String() + newContent := []map[string]interface{}{ + { + "type": "text", + "text": text, + "cache_control": map[string]string{ + "type": "ephemeral", + }, + }, + } + result, err := sjson.SetBytes(payload, contentPath, newContent) + if err != nil { + log.Warnf("failed to inject cache_control into message string content: %v", err) + return payload + } + payload = result + } + + return payload +} + +// injectToolsCacheControl adds cache_control to the last tool in the tools array. +// Per Anthropic docs: "The cache_control parameter on the last tool definition caches all tool definitions." +// This only adds cache_control if NO tool in the array already has it. +func injectToolsCacheControl(payload []byte) []byte { + tools := gjson.GetBytes(payload, "tools") + if !tools.Exists() || !tools.IsArray() { + return payload + } + + toolCount := int(tools.Get("#").Int()) + if toolCount == 0 { + return payload + } + + // Check if ANY tool already has cache_control - if so, don't modify tools + hasCacheControlInTools := false + tools.ForEach(func(_, tool gjson.Result) bool { + if tool.Get("cache_control").Exists() { + hasCacheControlInTools = true + return false + } + return true + }) + if hasCacheControlInTools { + return payload + } + + // Add cache_control to the last tool + lastToolPath := fmt.Sprintf("tools.%d.cache_control", toolCount-1) + result, err := sjson.SetBytes(payload, lastToolPath, map[string]string{"type": "ephemeral"}) + if err != nil { + log.Warnf("failed to inject cache_control into tools array: %v", err) + return payload + } + + return result +} + +// injectSystemCacheControl adds cache_control to the last element in the system prompt. +// Converts string system prompts to array format if needed. +// This only adds cache_control if NO system element already has it. +func injectSystemCacheControl(payload []byte) []byte { + system := gjson.GetBytes(payload, "system") + if !system.Exists() { + return payload + } + + if system.IsArray() { + count := int(system.Get("#").Int()) + if count == 0 { + return payload + } + + // Check if ANY system element already has cache_control + hasCacheControlInSystem := false + system.ForEach(func(_, item gjson.Result) bool { + if item.Get("cache_control").Exists() { + hasCacheControlInSystem = true + return false + } + return true + }) + if hasCacheControlInSystem { + return payload + } + + // Add cache_control to the last system element + lastSystemPath := fmt.Sprintf("system.%d.cache_control", count-1) + result, err := sjson.SetBytes(payload, lastSystemPath, map[string]string{"type": "ephemeral"}) + if err != nil { + log.Warnf("failed to inject cache_control into system array: %v", err) + return payload + } + payload = result + } else if system.Type == gjson.String { + // Convert string system prompt to array with cache_control + // "system": "text" -> "system": [{"type": "text", "text": "text", "cache_control": {"type": "ephemeral"}}] + text := system.String() + newSystem := []map[string]interface{}{ + { + "type": "text", + "text": text, + "cache_control": map[string]string{ + "type": "ephemeral", + }, + }, + } + result, err := sjson.SetBytes(payload, "system", newSystem) + if err != nil { + log.Warnf("failed to inject cache_control into system string: %v", err) + return payload + } + payload = result + } + + return payload +}