fix(claude): ensure max_tokens exceeds thinking budget for thinking models

Fixes an issue where Claude thinking models would return 400 errors when
the thinking.budget_tokens was greater than or equal to max_tokens.

Changes:
- Add MaxCompletionTokens: 128000 to all Claude thinking model definitions
- Add ensureMaxTokensForThinking() function in claude_executor.go that:
  - Checks if thinking is enabled with a budget_tokens value
  - Looks up the model's MaxCompletionTokens from the registry
  - Ensures max_tokens is set to at least the model's MaxCompletionTokens
  - Falls back to budget_tokens + 4000 buffer if registry lookup fails

This ensures Anthropic API constraint (max_tokens > thinking.budget_tokens)
is always satisfied when using extended thinking features.

Fixes: #339

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nestharus
2025-11-26 22:31:05 -08:00
parent 39621a0340
commit e73cdf5cff
2 changed files with 83 additions and 35 deletions

View File

@@ -24,49 +24,54 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4.5 Sonnet", DisplayName: "Claude 4.5 Sonnet",
}, },
{ {
ID: "claude-sonnet-4-5-thinking", ID: "claude-sonnet-4-5-thinking",
Object: "model", Object: "model",
Created: 1759104000, // 2025-09-29 Created: 1759104000, // 2025-09-29
OwnedBy: "anthropic", OwnedBy: "anthropic",
Type: "claude", Type: "claude",
DisplayName: "Claude 4.5 Sonnet Thinking", DisplayName: "Claude 4.5 Sonnet Thinking",
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 128000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
ID: "claude-opus-4-5-thinking", ID: "claude-opus-4-5-thinking",
Object: "model", Object: "model",
Created: 1761955200, // 2025-11-01 Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic", OwnedBy: "anthropic",
Type: "claude", Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking", DisplayName: "Claude 4.5 Opus Thinking",
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 128000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
ID: "claude-opus-4-5-thinking-low", ID: "claude-opus-4-5-thinking-low",
Object: "model", Object: "model",
Created: 1761955200, // 2025-11-01 Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic", OwnedBy: "anthropic",
Type: "claude", Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking Low", DisplayName: "Claude 4.5 Opus Thinking Low",
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 128000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
ID: "claude-opus-4-5-thinking-medium", ID: "claude-opus-4-5-thinking-medium",
Object: "model", Object: "model",
Created: 1761955200, // 2025-11-01 Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic", OwnedBy: "anthropic",
Type: "claude", Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking Medium", DisplayName: "Claude 4.5 Opus Thinking Medium",
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 128000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
ID: "claude-opus-4-5-thinking-high", ID: "claude-opus-4-5-thinking-high",
Object: "model", Object: "model",
Created: 1761955200, // 2025-11-01 Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic", OwnedBy: "anthropic",
Type: "claude", Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking High", DisplayName: "Claude 4.5 Opus Thinking High",
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 128000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
ID: "claude-opus-4-5-20251101", ID: "claude-opus-4-5-20251101",

View File

@@ -17,6 +17,7 @@ import (
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude" claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -66,6 +67,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
} }
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
body = ensureMaxTokensForThinking(req.Model, body)
// Extract betas from body and convert to header // Extract betas from body and convert to header
var extraBetas []string var extraBetas []string
extraBetas, body = extractAndRemoveBetas(body) extraBetas, body = extractAndRemoveBetas(body)
@@ -165,6 +169,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
body = checkSystemInstructions(body) body = checkSystemInstructions(body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
body = ensureMaxTokensForThinking(req.Model, body)
// Extract betas from body and convert to header // Extract betas from body and convert to header
var extraBetas []string var extraBetas []string
extraBetas, body = extractAndRemoveBetas(body) extraBetas, body = extractAndRemoveBetas(body)
@@ -447,6 +454,42 @@ func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []b
return body return body
} }
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
// Anthropic API requires this constraint; violating it returns a 400 error.
// This function should be called after all thinking configuration is finalized.
// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
thinkingType := gjson.GetBytes(body, "thinking.type").String()
if thinkingType != "enabled" {
return body
}
budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
if budgetTokens <= 0 {
return body
}
maxTokens := gjson.GetBytes(body, "max_tokens").Int()
// Look up the model's max completion tokens from the registry
maxCompletionTokens := 0
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil {
maxCompletionTokens = modelInfo.MaxCompletionTokens
}
// Fall back to budget + buffer if registry lookup fails or returns 0
const fallbackBuffer = 4000
requiredMaxTokens := budgetTokens + fallbackBuffer
if maxCompletionTokens > 0 {
requiredMaxTokens = int64(maxCompletionTokens)
}
if maxTokens < requiredMaxTokens {
body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
}
return body
}
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
if alias == "" { if alias == "" {
return "" return ""