mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 04:50:52 +08:00
refactor: improve thinking logic
This commit is contained in:
430
internal/thinking/apply.go
Normal file
430
internal/thinking/apply.go
Normal file
@@ -0,0 +1,430 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// providerAppliers maps provider names to their ProviderApplier implementations.
|
||||
var providerAppliers = map[string]ProviderApplier{
|
||||
"gemini": nil,
|
||||
"gemini-cli": nil,
|
||||
"claude": nil,
|
||||
"openai": nil,
|
||||
"codex": nil,
|
||||
"iflow": nil,
|
||||
"antigravity": nil,
|
||||
}
|
||||
|
||||
// GetProviderApplier returns the ProviderApplier for the given provider name.
|
||||
// Returns nil if the provider is not registered.
|
||||
func GetProviderApplier(provider string) ProviderApplier {
|
||||
return providerAppliers[provider]
|
||||
}
|
||||
|
||||
// RegisterProvider registers a provider applier by name.
|
||||
func RegisterProvider(name string, applier ProviderApplier) {
|
||||
providerAppliers[name] = applier
|
||||
}
|
||||
|
||||
// IsUserDefinedModel reports whether the model is a user-defined model that should
|
||||
// have thinking configuration passed through without validation.
|
||||
//
|
||||
// User-defined models are configured via config file's models[] array
|
||||
// (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models
|
||||
// are marked with UserDefined=true at registration time.
|
||||
//
|
||||
// User-defined models should have their thinking configuration applied directly,
|
||||
// letting the upstream service validate the configuration.
|
||||
func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
|
||||
if modelInfo == nil {
|
||||
return false
|
||||
}
|
||||
return modelInfo.UserDefined
|
||||
}
|
||||
|
||||
// ApplyThinking applies thinking configuration to a request body.
|
||||
//
|
||||
// This is the unified entry point for all providers. It follows the processing
|
||||
// order defined in FR25: route check → model capability query → config extraction
|
||||
// → validation → application.
|
||||
//
|
||||
// Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
|
||||
// the suffix configuration takes priority over any thinking parameters in the request body.
|
||||
// This enables users to override thinking settings via the model name without modifying their
|
||||
// request payload.
|
||||
//
|
||||
// Parameters:
|
||||
// - body: Original request body JSON
|
||||
// - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
|
||||
// - provider: Provider name (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
|
||||
//
|
||||
// Returns:
|
||||
// - Modified request body JSON with thinking configuration applied
|
||||
// - Error if validation fails (ThinkingError). On error, the original body
|
||||
// is returned (not nil) to enable defensive programming patterns.
|
||||
//
|
||||
// Passthrough behavior (returns original body without error):
|
||||
// - Unknown provider (not in providerAppliers map)
|
||||
// - modelInfo is nil (model not found in registry)
|
||||
// - modelInfo.Thinking is nil (model doesn't support thinking)
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// // With suffix - suffix config takes priority
|
||||
// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini")
|
||||
//
|
||||
// // Without suffix - uses body config
|
||||
// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini")
|
||||
func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
|
||||
// 1. Route check: Get provider applier
|
||||
applier := GetProviderApplier(provider)
|
||||
if applier == nil {
|
||||
log.WithField("provider", provider).Debug("thinking: unknown provider, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 2. Parse suffix and get modelInfo
|
||||
suffixResult := ParseSuffix(model)
|
||||
baseModel := suffixResult.ModelName
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(baseModel)
|
||||
|
||||
// 3. Model capability check
|
||||
if modelInfo == nil {
|
||||
log.WithField("model", model).Debug("thinking: nil modelInfo, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
if IsUserDefinedModel(modelInfo) {
|
||||
return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
|
||||
}
|
||||
config := extractThinkingConfig(body, provider)
|
||||
if hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
}).Debug("thinking: model does not support thinking, stripping config")
|
||||
return StripThinkingConfig(body, provider), nil
|
||||
}
|
||||
log.WithField("model", modelInfo.ID).Debug("thinking: model does not support thinking, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 4. Get config: suffix priority over body
|
||||
var config ThinkingConfig
|
||||
if suffixResult.HasSuffix {
|
||||
config = parseSuffixToConfig(suffixResult.RawSuffix)
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"raw_suffix": suffixResult.RawSuffix,
|
||||
"config": config,
|
||||
}).Debug("thinking: using suffix config (priority)")
|
||||
} else {
|
||||
config = extractThinkingConfig(body, provider)
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
"config": config,
|
||||
}).Debug("thinking: extracted config from request body")
|
||||
}
|
||||
|
||||
if !hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
}).Debug("thinking: no config found, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 5. Validate and normalize configuration
|
||||
validated, err := ValidateConfig(config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
"error": err.Error(),
|
||||
}).Warn("thinking: validation failed, returning original body")
|
||||
// Return original body on validation failure (defensive programming).
|
||||
// This ensures callers who ignore the error won't receive nil body.
|
||||
// The upstream service will decide how to handle the unmodified request.
|
||||
return body, err
|
||||
}
|
||||
|
||||
// Defensive check: ValidateConfig should never return (nil, nil)
|
||||
if validated == nil {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
}).Warn("thinking: ValidateConfig returned nil config without error, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
"validated": *validated,
|
||||
}).Debug("thinking: applying validated config")
|
||||
|
||||
// 6. Apply configuration using provider-specific applier
|
||||
return applier.Apply(body, *validated, modelInfo)
|
||||
}
|
||||
|
||||
// parseSuffixToConfig converts a raw suffix string to ThinkingConfig.
|
||||
//
|
||||
// Parsing priority:
|
||||
// 1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto
|
||||
// 2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel
|
||||
// 3. Numeric values: positive integers → ModeBudget, 0 → ModeNone
|
||||
//
|
||||
// If none of the above match, returns empty ThinkingConfig (treated as no config).
|
||||
func parseSuffixToConfig(rawSuffix string) ThinkingConfig {
|
||||
// 1. Try special values first (none, auto, -1)
|
||||
if mode, ok := ParseSpecialSuffix(rawSuffix); ok {
|
||||
switch mode {
|
||||
case ModeNone:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case ModeAuto:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Try level parsing (minimal, low, medium, high, xhigh)
|
||||
if level, ok := ParseLevelSuffix(rawSuffix); ok {
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: level}
|
||||
}
|
||||
|
||||
// 3. Try numeric parsing
|
||||
if budget, ok := ParseNumericSuffix(rawSuffix); ok {
|
||||
if budget == 0 {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: budget}
|
||||
}
|
||||
|
||||
// Unknown suffix format - return empty config
|
||||
log.WithField("raw_suffix", rawSuffix).Debug("thinking: unknown suffix format, treating as no config")
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// applyUserDefinedModel applies thinking configuration for user-defined models
|
||||
// without ThinkingSupport validation.
|
||||
func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider string, suffixResult SuffixResult) ([]byte, error) {
|
||||
// Get config: suffix priority over body
|
||||
var config ThinkingConfig
|
||||
if suffixResult.HasSuffix {
|
||||
config = parseSuffixToConfig(suffixResult.RawSuffix)
|
||||
} else {
|
||||
config = extractThinkingConfig(body, provider)
|
||||
}
|
||||
|
||||
if !hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
"user_defined": true,
|
||||
"passthrough": true,
|
||||
}).Debug("thinking: user-defined model, no config, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
applier := GetProviderApplier(provider)
|
||||
if applier == nil {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
"user_defined": true,
|
||||
"passthrough": true,
|
||||
}).Debug("thinking: user-defined model, unknown provider, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
"user_defined": true,
|
||||
"passthrough": false,
|
||||
"config": config,
|
||||
}).Debug("thinking: applying config for user-defined model (skip validation)")
|
||||
|
||||
return applier.Apply(body, config, modelInfo)
|
||||
}
|
||||
|
||||
// extractThinkingConfig extracts provider-specific thinking config from request body.
|
||||
func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
switch provider {
|
||||
case "claude":
|
||||
return extractClaudeConfig(body)
|
||||
case "gemini", "gemini-cli", "antigravity":
|
||||
return extractGeminiConfig(body, provider)
|
||||
case "openai":
|
||||
return extractOpenAIConfig(body)
|
||||
case "codex":
|
||||
return extractCodexConfig(body)
|
||||
case "iflow":
|
||||
return extractIFlowConfig(body)
|
||||
default:
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
}
|
||||
|
||||
func hasThinkingConfig(config ThinkingConfig) bool {
|
||||
return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
|
||||
}
|
||||
|
||||
// extractClaudeConfig extracts thinking configuration from Claude format request body.
|
||||
//
|
||||
// Claude API format:
|
||||
// - thinking.type: "enabled" or "disabled"
|
||||
// - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget)
|
||||
//
|
||||
// Priority: thinking.type="disabled" takes precedence over budget_tokens.
|
||||
// When type="enabled" without budget_tokens, returns ModeAuto to indicate
|
||||
// the user wants thinking enabled but didn't specify a budget.
|
||||
func extractClaudeConfig(body []byte) ThinkingConfig {
|
||||
thinkingType := gjson.GetBytes(body, "thinking.type").String()
|
||||
if thinkingType == "disabled" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
// Check budget_tokens
|
||||
if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() {
|
||||
value := int(budget.Int())
|
||||
switch value {
|
||||
case 0:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case -1:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: value}
|
||||
}
|
||||
}
|
||||
|
||||
// If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified)
|
||||
if thinkingType == "enabled" {
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractGeminiConfig extracts thinking configuration from Gemini format request body.
|
||||
//
|
||||
// Gemini API format:
|
||||
// - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3)
|
||||
// - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5)
|
||||
//
|
||||
// For gemini-cli and antigravity providers, the path is prefixed with "request.".
|
||||
//
|
||||
// Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format).
|
||||
// This allows newer Gemini 3 level-based configs to take precedence.
|
||||
func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
|
||||
prefix := "generationConfig.thinkingConfig"
|
||||
if provider == "gemini-cli" || provider == "antigravity" {
|
||||
prefix = "request.generationConfig.thinkingConfig"
|
||||
}
|
||||
|
||||
// Check thinkingLevel first (Gemini 3 format takes precedence)
|
||||
if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
|
||||
value := level.String()
|
||||
switch value {
|
||||
case "none":
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case "auto":
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
}
|
||||
|
||||
// Check thinkingBudget (Gemini 2.5 format)
|
||||
if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
|
||||
value := int(budget.Int())
|
||||
switch value {
|
||||
case 0:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case -1:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: value}
|
||||
}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractOpenAIConfig extracts thinking configuration from OpenAI format request body.
|
||||
//
|
||||
// OpenAI API format:
|
||||
// - reasoning_effort: "none", "low", "medium", "high" (discrete levels)
|
||||
//
|
||||
// OpenAI uses level-based thinking configuration only, no numeric budget support.
|
||||
// The "none" value is treated specially to return ModeNone.
|
||||
func extractOpenAIConfig(body []byte) ThinkingConfig {
|
||||
// Check reasoning_effort (OpenAI Chat Completions format)
|
||||
if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() {
|
||||
value := effort.String()
|
||||
if value == "none" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractCodexConfig extracts thinking configuration from Codex format request body.
|
||||
//
|
||||
// Codex API format (OpenAI Responses API):
|
||||
// - reasoning.effort: "none", "low", "medium", "high"
|
||||
//
|
||||
// This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort".
|
||||
func extractCodexConfig(body []byte) ThinkingConfig {
|
||||
// Check reasoning.effort (Codex / OpenAI Responses API format)
|
||||
if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() {
|
||||
value := effort.String()
|
||||
if value == "none" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractIFlowConfig extracts thinking configuration from iFlow format request body.
|
||||
//
|
||||
// iFlow API format (supports multiple model families):
|
||||
// - GLM format: chat_template_kwargs.enable_thinking (boolean)
|
||||
// - MiniMax format: reasoning_split (boolean)
|
||||
//
|
||||
// Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled".
|
||||
// The actual budget/configuration is determined by the iFlow applier based on model capabilities.
|
||||
// Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off.
|
||||
func extractIFlowConfig(body []byte) ThinkingConfig {
|
||||
// GLM format: chat_template_kwargs.enable_thinking
|
||||
if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() {
|
||||
if enabled.Bool() {
|
||||
// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: 1}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
// MiniMax format: reasoning_split
|
||||
if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() {
|
||||
if split.Bool() {
|
||||
// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: 1}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
Reference in New Issue
Block a user