feat(caching): implement Claude prompt caching with multi-turn support

- Add ensureCacheControl() to auto-inject cache breakpoints
- Cache tools (last tool), system (last element), and messages (2nd-to-last user turn)
- Add prompt-caching-2024-07-31 beta header
- Return original payload on sjson error to prevent corruption
- Include verification test for caching logic

Enables up to 90% cost reduction on cached tokens.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Martin Schneeweiss
2026-01-29 00:32:04 +01:00
parent 9e5b1d24e8
commit 3a43ecb19b
2 changed files with 428 additions and 1 deletions

View File

@@ -0,0 +1,210 @@
package executor
import (
"fmt"
"testing"
"github.com/tidwall/gjson"
)
func TestEnsureCacheControl(t *testing.T) {
// Test case 1: System prompt as string
t.Run("String System Prompt", func(t *testing.T) {
input := []byte(`{"model": "claude-3-5-sonnet", "system": "This is a long system prompt", "messages": []}`)
output := ensureCacheControl(input)
res := gjson.GetBytes(output, "system.0.cache_control.type")
if res.String() != "ephemeral" {
t.Errorf("cache_control not found in system string. Output: %s", string(output))
}
})
// Test case 2: System prompt as array
t.Run("Array System Prompt", func(t *testing.T) {
input := []byte(`{"model": "claude-3-5-sonnet", "system": [{"type": "text", "text": "Part 1"}, {"type": "text", "text": "Part 2"}], "messages": []}`)
output := ensureCacheControl(input)
// cache_control should only be on the LAST element
res0 := gjson.GetBytes(output, "system.0.cache_control")
res1 := gjson.GetBytes(output, "system.1.cache_control.type")
if res0.Exists() {
t.Errorf("cache_control should NOT be on the first element")
}
if res1.String() != "ephemeral" {
t.Errorf("cache_control not found on last system element. Output: %s", string(output))
}
})
// Test case 3: Tools are cached
t.Run("Tools Caching", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"tools": [
{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}},
{"name": "tool2", "description": "Second tool", "input_schema": {"type": "object"}}
],
"system": "System prompt",
"messages": []
}`)
output := ensureCacheControl(input)
// cache_control should only be on the LAST tool
tool0Cache := gjson.GetBytes(output, "tools.0.cache_control")
tool1Cache := gjson.GetBytes(output, "tools.1.cache_control.type")
if tool0Cache.Exists() {
t.Errorf("cache_control should NOT be on the first tool")
}
if tool1Cache.String() != "ephemeral" {
t.Errorf("cache_control not found on last tool. Output: %s", string(output))
}
// System should also have cache_control
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("cache_control not found in system. Output: %s", string(output))
}
})
// Test case 4: Tools and system are INDEPENDENT breakpoints
// Per Anthropic docs: Up to 4 breakpoints allowed, tools and system are cached separately
t.Run("Independent Cache Breakpoints", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"tools": [
{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}, "cache_control": {"type": "ephemeral"}}
],
"system": [{"type": "text", "text": "System"}],
"messages": []
}`)
output := ensureCacheControl(input)
// Tool already has cache_control - should not be changed
tool0Cache := gjson.GetBytes(output, "tools.0.cache_control.type")
if tool0Cache.String() != "ephemeral" {
t.Errorf("existing cache_control was incorrectly removed")
}
// System SHOULD get cache_control because it is an INDEPENDENT breakpoint
// Tools and system are separate cache levels in the hierarchy
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("system should have its own cache_control breakpoint (independent of tools)")
}
})
// Test case 5: Only tools, no system
t.Run("Only Tools No System", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"tools": [
{"name": "tool1", "description": "Tool", "input_schema": {"type": "object"}}
],
"messages": [{"role": "user", "content": "Hi"}]
}`)
output := ensureCacheControl(input)
toolCache := gjson.GetBytes(output, "tools.0.cache_control.type")
if toolCache.String() != "ephemeral" {
t.Errorf("cache_control not found on tool. Output: %s", string(output))
}
})
// Test case 6: Many tools (Claude Code scenario)
t.Run("Many Tools (Claude Code Scenario)", func(t *testing.T) {
// Simulate Claude Code with many tools
toolsJSON := `[`
for i := 0; i < 50; i++ {
if i > 0 {
toolsJSON += ","
}
toolsJSON += fmt.Sprintf(`{"name": "tool%d", "description": "Tool %d", "input_schema": {"type": "object"}}`, i, i)
}
toolsJSON += `]`
input := []byte(fmt.Sprintf(`{
"model": "claude-3-5-sonnet",
"tools": %s,
"system": [{"type": "text", "text": "You are Claude Code"}],
"messages": [{"role": "user", "content": "Hello"}]
}`, toolsJSON))
output := ensureCacheControl(input)
// Only the last tool (index 49) should have cache_control
for i := 0; i < 49; i++ {
path := fmt.Sprintf("tools.%d.cache_control", i)
if gjson.GetBytes(output, path).Exists() {
t.Errorf("tool %d should NOT have cache_control", i)
}
}
lastToolCache := gjson.GetBytes(output, "tools.49.cache_control.type")
if lastToolCache.String() != "ephemeral" {
t.Errorf("last tool (49) should have cache_control")
}
// System should also have cache_control
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("system should have cache_control")
}
t.Log("test passed: 50 tools - cache_control only on last tool")
})
// Test case 7: Empty tools array
t.Run("Empty Tools Array", func(t *testing.T) {
input := []byte(`{"model": "claude-3-5-sonnet", "tools": [], "system": "Test", "messages": []}`)
output := ensureCacheControl(input)
// System should still get cache_control
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("system should have cache_control even with empty tools array")
}
})
}
// TestCacheControlOrder verifies the correct order: tools -> system -> messages
func TestCacheControlOrder(t *testing.T) {
input := []byte(`{
"model": "claude-sonnet-4",
"tools": [
{"name": "Read", "description": "Read file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}},
{"name": "Write", "description": "Write file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}}}
],
"system": [
{"type": "text", "text": "You are Claude Code, Anthropic's official CLI for Claude."},
{"type": "text", "text": "Additional instructions here..."}
],
"messages": [
{"role": "user", "content": "Hello"}
]
}`)
output := ensureCacheControl(input)
// 1. Last tool has cache_control
if gjson.GetBytes(output, "tools.1.cache_control.type").String() != "ephemeral" {
t.Error("last tool should have cache_control")
}
// 2. First tool has NO cache_control
if gjson.GetBytes(output, "tools.0.cache_control").Exists() {
t.Error("first tool should NOT have cache_control")
}
// 3. Last system element has cache_control
if gjson.GetBytes(output, "system.1.cache_control.type").String() != "ephemeral" {
t.Error("last system element should have cache_control")
}
// 4. First system element has NO cache_control
if gjson.GetBytes(output, "system.0.cache_control").Exists() {
t.Error("first system element should NOT have cache_control")
}
t.Log("cache order correct: tools -> system")
}

View File

@@ -120,6 +120,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
body = ensureCacheControl(body)
// Extract betas from body and convert to header
var extraBetas []string
extraBetas, body = extractAndRemoveBetas(body)
@@ -252,6 +255,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
body = ensureCacheControl(body)
// Extract betas from body and convert to header
var extraBetas []string
extraBetas, body = extractAndRemoveBetas(body)
@@ -636,7 +642,7 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
ginHeaders = ginCtx.Request.Header
}
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,prompt-caching-2024-07-31"
if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" {
baseBetas = val
if !strings.Contains(val, "oauth") {
@@ -990,3 +996,214 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A
return payload
}
// ensureCacheControl injects cache_control breakpoints into the payload for optimal prompt caching.
// According to Anthropic's documentation, cache prefixes are created in order: tools -> system -> messages.
// This function adds cache_control to:
// 1. The LAST tool in the tools array (caches all tool definitions)
// 2. The LAST element in the system array (caches system prompt)
// 3. The SECOND-TO-LAST user turn (caches conversation history for multi-turn)
//
// Up to 4 cache breakpoints are allowed per request. Tools, System, and Messages are INDEPENDENT breakpoints.
// This enables up to 90% cost reduction on cached tokens (cache read = 0.1x base price).
// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
func ensureCacheControl(payload []byte) []byte {
// 1. Inject cache_control into the LAST tool (caches all tool definitions)
// Tools are cached first in the hierarchy, so this is the most important breakpoint.
payload = injectToolsCacheControl(payload)
// 2. Inject cache_control into the LAST system prompt element
// System is the second level in the cache hierarchy.
payload = injectSystemCacheControl(payload)
// 3. Inject cache_control into messages for multi-turn conversation caching
// This caches the conversation history up to the second-to-last user turn.
payload = injectMessagesCacheControl(payload)
return payload
}
// injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching.
// Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache."
// This enables caching of conversation history, which is especially beneficial for long multi-turn conversations.
// Only adds cache_control if:
// - There are at least 2 user turns in the conversation
// - No message content already has cache_control
func injectMessagesCacheControl(payload []byte) []byte {
messages := gjson.GetBytes(payload, "messages")
if !messages.Exists() || !messages.IsArray() {
return payload
}
// Check if ANY message content already has cache_control
hasCacheControlInMessages := false
messages.ForEach(func(_, msg gjson.Result) bool {
content := msg.Get("content")
if content.IsArray() {
content.ForEach(func(_, item gjson.Result) bool {
if item.Get("cache_control").Exists() {
hasCacheControlInMessages = true
return false
}
return true
})
}
return !hasCacheControlInMessages
})
if hasCacheControlInMessages {
return payload
}
// Find all user message indices
var userMsgIndices []int
messages.ForEach(func(index gjson.Result, msg gjson.Result) bool {
if msg.Get("role").String() == "user" {
userMsgIndices = append(userMsgIndices, int(index.Int()))
}
return true
})
// Need at least 2 user turns to cache the second-to-last
if len(userMsgIndices) < 2 {
return payload
}
// Get the second-to-last user message index
secondToLastUserIdx := userMsgIndices[len(userMsgIndices)-2]
// Get the content of this message
contentPath := fmt.Sprintf("messages.%d.content", secondToLastUserIdx)
content := gjson.GetBytes(payload, contentPath)
if content.IsArray() {
// Add cache_control to the last content block of this message
contentCount := int(content.Get("#").Int())
if contentCount > 0 {
cacheControlPath := fmt.Sprintf("messages.%d.content.%d.cache_control", secondToLastUserIdx, contentCount-1)
result, err := sjson.SetBytes(payload, cacheControlPath, map[string]string{"type": "ephemeral"})
if err != nil {
log.Warnf("failed to inject cache_control into messages: %v", err)
return payload
}
payload = result
}
} else if content.Type == gjson.String {
// Convert string content to array with cache_control
text := content.String()
newContent := []map[string]interface{}{
{
"type": "text",
"text": text,
"cache_control": map[string]string{
"type": "ephemeral",
},
},
}
result, err := sjson.SetBytes(payload, contentPath, newContent)
if err != nil {
log.Warnf("failed to inject cache_control into message string content: %v", err)
return payload
}
payload = result
}
return payload
}
// injectToolsCacheControl adds cache_control to the last tool in the tools array.
// Per Anthropic docs: "The cache_control parameter on the last tool definition caches all tool definitions."
// This only adds cache_control if NO tool in the array already has it.
func injectToolsCacheControl(payload []byte) []byte {
tools := gjson.GetBytes(payload, "tools")
if !tools.Exists() || !tools.IsArray() {
return payload
}
toolCount := int(tools.Get("#").Int())
if toolCount == 0 {
return payload
}
// Check if ANY tool already has cache_control - if so, don't modify tools
hasCacheControlInTools := false
tools.ForEach(func(_, tool gjson.Result) bool {
if tool.Get("cache_control").Exists() {
hasCacheControlInTools = true
return false
}
return true
})
if hasCacheControlInTools {
return payload
}
// Add cache_control to the last tool
lastToolPath := fmt.Sprintf("tools.%d.cache_control", toolCount-1)
result, err := sjson.SetBytes(payload, lastToolPath, map[string]string{"type": "ephemeral"})
if err != nil {
log.Warnf("failed to inject cache_control into tools array: %v", err)
return payload
}
return result
}
// injectSystemCacheControl adds cache_control to the last element in the system prompt.
// Converts string system prompts to array format if needed.
// This only adds cache_control if NO system element already has it.
func injectSystemCacheControl(payload []byte) []byte {
system := gjson.GetBytes(payload, "system")
if !system.Exists() {
return payload
}
if system.IsArray() {
count := int(system.Get("#").Int())
if count == 0 {
return payload
}
// Check if ANY system element already has cache_control
hasCacheControlInSystem := false
system.ForEach(func(_, item gjson.Result) bool {
if item.Get("cache_control").Exists() {
hasCacheControlInSystem = true
return false
}
return true
})
if hasCacheControlInSystem {
return payload
}
// Add cache_control to the last system element
lastSystemPath := fmt.Sprintf("system.%d.cache_control", count-1)
result, err := sjson.SetBytes(payload, lastSystemPath, map[string]string{"type": "ephemeral"})
if err != nil {
log.Warnf("failed to inject cache_control into system array: %v", err)
return payload
}
payload = result
} else if system.Type == gjson.String {
// Convert string system prompt to array with cache_control
// "system": "text" -> "system": [{"type": "text", "text": "text", "cache_control": {"type": "ephemeral"}}]
text := system.String()
newSystem := []map[string]interface{}{
{
"type": "text",
"text": text,
"cache_control": map[string]string{
"type": "ephemeral",
},
},
}
result, err := sjson.SetBytes(payload, "system", newSystem)
if err != nil {
log.Warnf("failed to inject cache_control into system string: %v", err)
return payload
}
payload = result
}
return payload
}