mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-02 20:40:52 +08:00
refactor(claude): move max_tokens constraint enforcement to Apply method
This commit is contained in:
@@ -17,7 +17,6 @@ import (
|
|||||||
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
|
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
@@ -119,9 +118,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
|
||||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
|
||||||
body = ensureMaxTokensForThinking(baseModel, body)
|
|
||||||
|
|
||||||
// Extract betas from body and convert to header
|
// Extract betas from body and convert to header
|
||||||
var extraBetas []string
|
var extraBetas []string
|
||||||
extraBetas, body = extractAndRemoveBetas(body)
|
extraBetas, body = extractAndRemoveBetas(body)
|
||||||
@@ -250,9 +246,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
|
||||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
|
||||||
body = ensureMaxTokensForThinking(baseModel, body)
|
|
||||||
|
|
||||||
// Extract betas from body and convert to header
|
// Extract betas from body and convert to header
|
||||||
var extraBetas []string
|
var extraBetas []string
|
||||||
extraBetas, body = extractAndRemoveBetas(body)
|
extraBetas, body = extractAndRemoveBetas(body)
|
||||||
@@ -541,81 +534,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
|
||||||
// Anthropic API requires this constraint; violating it returns a 400 error.
|
|
||||||
// This function should be called after all thinking configuration is finalized.
|
|
||||||
// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
|
|
||||||
func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
|
|
||||||
thinkingType := gjson.GetBytes(body, "thinking.type").String()
|
|
||||||
if thinkingType != "enabled" {
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
|
|
||||||
if budgetTokens <= 0 {
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
maxTokens := gjson.GetBytes(body, "max_tokens").Int()
|
|
||||||
|
|
||||||
// Look up the model's max completion tokens from the registry
|
|
||||||
maxCompletionTokens := 0
|
|
||||||
if modelInfo := registry.LookupModelInfo(modelName); modelInfo != nil {
|
|
||||||
maxCompletionTokens = modelInfo.MaxCompletionTokens
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fall back to budget + buffer if registry lookup fails or returns 0
|
|
||||||
const fallbackBuffer = 4000
|
|
||||||
requiredMaxTokens := budgetTokens + fallbackBuffer
|
|
||||||
if maxCompletionTokens > 0 {
|
|
||||||
requiredMaxTokens = int64(maxCompletionTokens)
|
|
||||||
}
|
|
||||||
|
|
||||||
if maxTokens < requiredMaxTokens {
|
|
||||||
body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
|
|
||||||
}
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
|
|
||||||
if auth == nil || e.cfg == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
var attrKey, attrBase string
|
|
||||||
if auth.Attributes != nil {
|
|
||||||
attrKey = strings.TrimSpace(auth.Attributes["api_key"])
|
|
||||||
attrBase = strings.TrimSpace(auth.Attributes["base_url"])
|
|
||||||
}
|
|
||||||
for i := range e.cfg.ClaudeKey {
|
|
||||||
entry := &e.cfg.ClaudeKey[i]
|
|
||||||
cfgKey := strings.TrimSpace(entry.APIKey)
|
|
||||||
cfgBase := strings.TrimSpace(entry.BaseURL)
|
|
||||||
if attrKey != "" && attrBase != "" {
|
|
||||||
if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
|
|
||||||
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if attrKey != "" {
|
|
||||||
for i := range e.cfg.ClaudeKey {
|
|
||||||
entry := &e.cfg.ClaudeKey[i]
|
|
||||||
if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type compositeReadCloser struct {
|
type compositeReadCloser struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
closers []func() error
|
closers []func() error
|
||||||
|
|||||||
@@ -80,9 +80,54 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
|
|||||||
|
|
||||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||||
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
||||||
|
|
||||||
|
// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
|
||||||
|
result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens.
|
||||||
|
// Anthropic API requires this constraint; violating it returns a 400 error.
|
||||||
|
func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte {
|
||||||
|
if budgetTokens <= 0 {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
|
||||||
|
if effectiveMax > 0 && effectiveMax > budgetTokens {
|
||||||
|
if setDefaultMax {
|
||||||
|
body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to budget + buffer if no effective max or max <= budget
|
||||||
|
const fallbackBuffer = 4000
|
||||||
|
requiredMaxTokens := budgetTokens + fallbackBuffer
|
||||||
|
if effectiveMax > 0 && effectiveMax > requiredMaxTokens {
|
||||||
|
requiredMaxTokens = effectiveMax
|
||||||
|
}
|
||||||
|
|
||||||
|
currentMax := gjson.GetBytes(body, "max_tokens").Int()
|
||||||
|
if currentMax < int64(requiredMaxTokens) {
|
||||||
|
body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// effectiveMaxTokens returns the max tokens to cap thinking:
|
||||||
|
// prefer request-provided max_tokens; otherwise fall back to model default.
|
||||||
|
// The boolean indicates whether the value came from the model default (and thus should be written back).
|
||||||
|
func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
|
||||||
|
if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 {
|
||||||
|
return int(maxTok.Int()), false
|
||||||
|
}
|
||||||
|
if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
|
||||||
|
return modelInfo.MaxCompletionTokens, true
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||||
return body, nil
|
return body, nil
|
||||||
|
|||||||
Reference in New Issue
Block a user