From c421d653e75e3eb161d6f1d96578c40510e1fbb8 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:50:35 +0800 Subject: [PATCH 1/2] refactor(claude): move max_tokens constraint enforcement to Apply method --- internal/runtime/executor/claude_executor.go | 82 -------------------- internal/thinking/provider/claude/apply.go | 45 +++++++++++ 2 files changed, 45 insertions(+), 82 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 17c5a143..b6d5418a 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -17,7 +17,6 @@ import ( claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" - "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -119,9 +118,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) - // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(baseModel, body) - // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -250,9 +246,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) - // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(baseModel, body) - // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -541,81 +534,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte { return body } -// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled. -// Anthropic API requires this constraint; violating it returns a 400 error. -// This function should be called after all thinking configuration is finalized. -// It looks up the model's MaxCompletionTokens from the registry to use as the cap. -func ensureMaxTokensForThinking(modelName string, body []byte) []byte { - thinkingType := gjson.GetBytes(body, "thinking.type").String() - if thinkingType != "enabled" { - return body - } - - budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int() - if budgetTokens <= 0 { - return body - } - - maxTokens := gjson.GetBytes(body, "max_tokens").Int() - - // Look up the model's max completion tokens from the registry - maxCompletionTokens := 0 - if modelInfo := registry.LookupModelInfo(modelName); modelInfo != nil { - maxCompletionTokens = modelInfo.MaxCompletionTokens - } - - // Fall back to budget + buffer if registry lookup fails or returns 0 - const fallbackBuffer = 4000 - requiredMaxTokens := budgetTokens + fallbackBuffer - if maxCompletionTokens > 0 { - requiredMaxTokens = int64(maxCompletionTokens) - } - - if maxTokens < requiredMaxTokens { - body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) - } - return body -} - -func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey { - if auth == nil || e.cfg == nil { - return nil - } - var attrKey, attrBase string - if auth.Attributes != nil { - attrKey = strings.TrimSpace(auth.Attributes["api_key"]) - attrBase = strings.TrimSpace(auth.Attributes["base_url"]) - } - for i := range e.cfg.ClaudeKey { - entry := &e.cfg.ClaudeKey[i] - cfgKey := strings.TrimSpace(entry.APIKey) - cfgBase := strings.TrimSpace(entry.BaseURL) - if attrKey != "" && attrBase != "" { - if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) { - return entry - } - continue - } - if attrKey != "" && strings.EqualFold(cfgKey, attrKey) { - if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) { - return entry - } - } - if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) { - return entry - } - } - if attrKey != "" { - for i := range e.cfg.ClaudeKey { - entry := &e.cfg.ClaudeKey[i] - if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) { - return entry - } - } - } - return nil -} - type compositeReadCloser struct { io.Reader closers []func() error diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go index b7833072..babc2f76 100644 --- a/internal/thinking/provider/claude/apply.go +++ b/internal/thinking/provider/claude/apply.go @@ -80,9 +80,54 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo * result, _ := sjson.SetBytes(body, "thinking.type", "enabled") result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + + // Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint) + result = a.normalizeClaudeBudget(result, config.Budget, modelInfo) return result, nil } +// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens. +// Anthropic API requires this constraint; violating it returns a 400 error. +func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte { + if budgetTokens <= 0 { + return body + } + + effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo) + if effectiveMax > 0 && effectiveMax > budgetTokens { + if setDefaultMax { + body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax) + } + return body + } + + // Fall back to budget + buffer if no effective max or max <= budget + const fallbackBuffer = 4000 + requiredMaxTokens := budgetTokens + fallbackBuffer + if effectiveMax > 0 && effectiveMax > requiredMaxTokens { + requiredMaxTokens = effectiveMax + } + + currentMax := gjson.GetBytes(body, "max_tokens").Int() + if currentMax < int64(requiredMaxTokens) { + body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) + } + return body +} + +// effectiveMaxTokens returns the max tokens to cap thinking: +// prefer request-provided max_tokens; otherwise fall back to model default. +// The boolean indicates whether the value came from the model default (and thus should be written back). +func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) { + if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 { + return int(maxTok.Int()), false + } + if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 { + return modelInfo.MaxCompletionTokens, true + } + return 0, false +} + func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) { if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { return body, nil From 239a28793c3b0229a0cefe7673c4e72c54c3288e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:32:20 +0800 Subject: [PATCH 2/2] feat(claude): clamp thinking budget to max_tokens constraints --- internal/thinking/provider/claude/apply.go | 38 ++++++++++++++-------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go index babc2f76..3c74d514 100644 --- a/internal/thinking/provider/claude/apply.go +++ b/internal/thinking/provider/claude/apply.go @@ -93,25 +93,37 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo return body } + // Ensure the request satisfies Claude constraints: + // 1) Determine effective max_tokens (request overrides model default) + // 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1 + // 3) If the adjusted budget falls below the model minimum, leave the request unchanged + // 4) If max_tokens came from model default, write it back into the request + effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo) - if effectiveMax > 0 && effectiveMax > budgetTokens { - if setDefaultMax { - body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax) - } + if setDefaultMax && effectiveMax > 0 { + body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax) + } + + // Compute the budget we would apply after enforcing budget_tokens < max_tokens. + adjustedBudget := budgetTokens + if effectiveMax > 0 && adjustedBudget >= effectiveMax { + adjustedBudget = effectiveMax - 1 + } + + minBudget := 0 + if modelInfo != nil && modelInfo.Thinking != nil { + minBudget = modelInfo.Thinking.Min + } + if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget { + // If enforcing the max_tokens constraint would push the budget below the model minimum, + // leave the request unchanged. return body } - // Fall back to budget + buffer if no effective max or max <= budget - const fallbackBuffer = 4000 - requiredMaxTokens := budgetTokens + fallbackBuffer - if effectiveMax > 0 && effectiveMax > requiredMaxTokens { - requiredMaxTokens = effectiveMax + if adjustedBudget != budgetTokens { + body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget) } - currentMax := gjson.GetBytes(body, "max_tokens").Int() - if currentMax < int64(requiredMaxTokens) { - body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) - } return body }