From e73cdf5cffd15e00243f7907d68f5508ee958140 Mon Sep 17 00:00:00 2001 From: nestharus Date: Wed, 26 Nov 2025 22:31:05 -0800 Subject: [PATCH] fix(claude): ensure max_tokens exceeds thinking budget for thinking models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes an issue where Claude thinking models would return 400 errors when the thinking.budget_tokens was greater than or equal to max_tokens. Changes: - Add MaxCompletionTokens: 128000 to all Claude thinking model definitions - Add ensureMaxTokensForThinking() function in claude_executor.go that: - Checks if thinking is enabled with a budget_tokens value - Looks up the model's MaxCompletionTokens from the registry - Ensures max_tokens is set to at least the model's MaxCompletionTokens - Falls back to budget_tokens + 4000 buffer if registry lookup fails This ensures Anthropic API constraint (max_tokens > thinking.budget_tokens) is always satisfied when using extended thinking features. Fixes: #339 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- internal/registry/model_definitions.go | 75 +++++++++++--------- internal/runtime/executor/claude_executor.go | 43 +++++++++++ 2 files changed, 83 insertions(+), 35 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index daa95a4d..77b06b72 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -24,49 +24,54 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.5 Sonnet", }, { - ID: "claude-sonnet-4-5-thinking", - Object: "model", - Created: 1759104000, // 2025-09-29 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Sonnet Thinking", - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + ID: "claude-sonnet-4-5-thinking", + Object: "model", + Created: 1759104000, // 2025-09-29 + OwnedBy: "anthropic", + Type: "claude", + DisplayName: "Claude 4.5 Sonnet Thinking", + MaxCompletionTokens: 128000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { - ID: "claude-opus-4-5-thinking", - Object: "model", - Created: 1761955200, // 2025-11-01 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Opus Thinking", - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + ID: "claude-opus-4-5-thinking", + Object: "model", + Created: 1761955200, // 2025-11-01 + OwnedBy: "anthropic", + Type: "claude", + DisplayName: "Claude 4.5 Opus Thinking", + MaxCompletionTokens: 128000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { - ID: "claude-opus-4-5-thinking-low", - Object: "model", - Created: 1761955200, // 2025-11-01 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Opus Thinking Low", - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + ID: "claude-opus-4-5-thinking-low", + Object: "model", + Created: 1761955200, // 2025-11-01 + OwnedBy: "anthropic", + Type: "claude", + DisplayName: "Claude 4.5 Opus Thinking Low", + MaxCompletionTokens: 128000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { - ID: "claude-opus-4-5-thinking-medium", - Object: "model", - Created: 1761955200, // 2025-11-01 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Opus Thinking Medium", - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + ID: "claude-opus-4-5-thinking-medium", + Object: "model", + Created: 1761955200, // 2025-11-01 + OwnedBy: "anthropic", + Type: "claude", + DisplayName: "Claude 4.5 Opus Thinking Medium", + MaxCompletionTokens: 128000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { - ID: "claude-opus-4-5-thinking-high", - Object: "model", - Created: 1761955200, // 2025-11-01 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Opus Thinking High", - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + ID: "claude-opus-4-5-thinking-high", + Object: "model", + Created: 1761955200, // 2025-11-01 + OwnedBy: "anthropic", + Type: "claude", + DisplayName: "Claude 4.5 Opus Thinking High", + MaxCompletionTokens: 128000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-opus-4-5-20251101", diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 591b8f9e..1a18c46a 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -17,6 +17,7 @@ import ( claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -66,6 +67,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } body = applyPayloadConfig(e.cfg, req.Model, body) + // Ensure max_tokens > thinking.budget_tokens when thinking is enabled + body = ensureMaxTokensForThinking(req.Model, body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -165,6 +169,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A body = checkSystemInstructions(body) body = applyPayloadConfig(e.cfg, req.Model, body) + // Ensure max_tokens > thinking.budget_tokens when thinking is enabled + body = ensureMaxTokensForThinking(req.Model, body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -447,6 +454,42 @@ func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []b return body } +// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled. +// Anthropic API requires this constraint; violating it returns a 400 error. +// This function should be called after all thinking configuration is finalized. +// It looks up the model's MaxCompletionTokens from the registry to use as the cap. +func ensureMaxTokensForThinking(modelName string, body []byte) []byte { + thinkingType := gjson.GetBytes(body, "thinking.type").String() + if thinkingType != "enabled" { + return body + } + + budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int() + if budgetTokens <= 0 { + return body + } + + maxTokens := gjson.GetBytes(body, "max_tokens").Int() + + // Look up the model's max completion tokens from the registry + maxCompletionTokens := 0 + if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil { + maxCompletionTokens = modelInfo.MaxCompletionTokens + } + + // Fall back to budget + buffer if registry lookup fails or returns 0 + const fallbackBuffer = 4000 + requiredMaxTokens := budgetTokens + fallbackBuffer + if maxCompletionTokens > 0 { + requiredMaxTokens = int64(maxCompletionTokens) + } + + if maxTokens < requiredMaxTokens { + body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) + } + return body +} + func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { if alias == "" { return ""