feat(util): implement dynamic thinking suffix normalization and refactor budget resolution logic

- Added support for parsing and normalizing dynamic thinking model suffixes.
- Centralized budget resolution across executors and payload helpers.
- Retired legacy Gemini-specific thinking handlers in favor of unified logic.
- Updated executors to use metadata-based thinking configuration.
- Added `ResolveOriginalModel` utility for resolving normalized upstream models using request metadata.
- Updated executors (Gemini, Codex, iFlow, OpenAI, Qwen) to incorporate upstream model resolution and substitute model values in payloads and request URLs.
- Ensured fallbacks handle cases with missing or malformed metadata to derive models robustly.
- Refactored upstream model resolution to dynamically incorporate metadata for selecting and normalizing models.
- Improved handling of thinking configurations and model overrides in executors.
- Removed hardcoded thinking model entries and migrated logic to metadata-based resolution.
- Updated payload mutations to always include the resolved model.
This commit is contained in:
Luis Pater
2025-12-11 01:16:56 +08:00
parent e717939edb
commit 423ce97665
13 changed files with 579 additions and 647 deletions

View File

@@ -133,8 +133,8 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
return return
} }
// Normalize model (handles Gemini thinking suffixes) // Normalize model (handles dynamic thinking suffixes)
normalizedModel, _ := util.NormalizeGeminiThinkingModel(modelName) normalizedModel, _ := util.NormalizeThinkingModel(modelName)
// Track resolved model for logging (may change if mapping is applied) // Track resolved model for logging (may change if mapping is applied)
resolvedModel := normalizedModel resolvedModel := normalizedModel

View File

@@ -26,60 +26,6 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4.5 Sonnet", DisplayName: "Claude 4.5 Sonnet",
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 64000, MaxCompletionTokens: 64000,
},
{
ID: "claude-sonnet-4-5-thinking",
Object: "model",
Created: 1759104000, // 2025-09-29
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Sonnet Thinking",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking-low",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking Low",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking-medium",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking Medium",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking-high",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking High",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
@@ -92,6 +38,7 @@ func GetClaudeModels() []*ModelInfo {
Description: "Premium model combining maximum intelligence with practical performance", Description: "Premium model combining maximum intelligence with practical performance",
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 64000, MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
ID: "claude-opus-4-1-20250805", ID: "claude-opus-4-1-20250805",
@@ -530,58 +477,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000, MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"}, SupportedParameters: []string{"tools"},
}, },
{
ID: "gpt-5-minimal",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 Minimal",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-low",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 Low",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-medium",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 Medium",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-high",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 High",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{ {
ID: "gpt-5-codex", ID: "gpt-5-codex",
Object: "model", Object: "model",
@@ -595,45 +490,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000, MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"}, SupportedParameters: []string{"tools"},
}, },
{
ID: "gpt-5-codex-low",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex Low",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-medium",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex Medium",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-high",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex High",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{ {
ID: "gpt-5-codex-mini", ID: "gpt-5-codex-mini",
Object: "model", Object: "model",
@@ -647,32 +503,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000, MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"}, SupportedParameters: []string{"tools"},
}, },
{
ID: "gpt-5-codex-mini-medium",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini Medium",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-mini-high",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini High",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{ {
ID: "gpt-5.1", ID: "gpt-5.1",
Object: "model", Object: "model",
@@ -686,58 +516,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000, MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"}, SupportedParameters: []string{"tools"},
}, },
{
ID: "gpt-5.1-none",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Nothink",
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-low",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Low",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-medium",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Medium",
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-high",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 High",
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{ {
ID: "gpt-5.1-codex", ID: "gpt-5.1-codex",
Object: "model", Object: "model",
@@ -751,45 +529,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000, MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"}, SupportedParameters: []string{"tools"},
}, },
{
ID: "gpt-5.1-codex-low",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Low",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-medium",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Medium",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-high",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex High",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{ {
ID: "gpt-5.1-codex-mini", ID: "gpt-5.1-codex-mini",
Object: "model", Object: "model",
@@ -803,33 +542,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000, MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"}, SupportedParameters: []string{"tools"},
}, },
{
ID: "gpt-5.1-codex-mini-medium",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Mini Medium",
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-mini-high",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Mini High",
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{ {
ID: "gpt-5.1-codex-max", ID: "gpt-5.1-codex-max",
Object: "model", Object: "model",
@@ -843,58 +555,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000, MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"}, SupportedParameters: []string{"tools"},
}, },
{
ID: "gpt-5.1-codex-max-low",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max Low",
Description: "Stable version of GPT 5.1 Codex Max Low",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max-medium",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max Medium",
Description: "Stable version of GPT 5.1 Codex Max Medium",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max-high",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max High",
Description: "Stable version of GPT 5.1 Codex Max High",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max-xhigh",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max XHigh",
Description: "Stable version of GPT 5.1 Codex Max XHigh",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
} }
} }

View File

@@ -54,15 +54,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Use streaming translation to preserve function calling, except for claude. // Use streaming translation to preserve function calling, except for claude.
stream := from != to stream := from != to
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
modelForUpstream := req.Model upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { if upstreamModel == "" {
body, _ = sjson.SetBytes(body, "model", modelOverride) upstreamModel = req.Model
modelForUpstream = modelOverride
} }
// Inject thinking config based on model suffix for thinking variants if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
body = e.injectThinkingConfig(req.Model, body) upstreamModel = modelOverride
} else if !strings.EqualFold(upstreamModel, req.Model) {
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
upstreamModel = modelOverride
}
}
body, _ = sjson.SetBytes(body, "model", upstreamModel)
// Inject thinking config based on model metadata for thinking variants
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") { if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
body = checkSystemInstructions(body) body = checkSystemInstructions(body)
} }
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -161,11 +168,20 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("claude") to := sdktranslator.FromString("claude")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
body, _ = sjson.SetBytes(body, "model", modelOverride) if upstreamModel == "" {
upstreamModel = req.Model
} }
// Inject thinking config based on model suffix for thinking variants if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
body = e.injectThinkingConfig(req.Model, body) upstreamModel = modelOverride
} else if !strings.EqualFold(upstreamModel, req.Model) {
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
upstreamModel = modelOverride
}
}
body, _ = sjson.SetBytes(body, "model", upstreamModel)
// Inject thinking config based on model metadata for thinking variants
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
body = checkSystemInstructions(body) body = checkSystemInstructions(body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -295,13 +311,20 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
// Use streaming translation to preserve function calling, except for claude. // Use streaming translation to preserve function calling, except for claude.
stream := from != to stream := from != to
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
modelForUpstream := req.Model upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { if upstreamModel == "" {
body, _ = sjson.SetBytes(body, "model", modelOverride) upstreamModel = req.Model
modelForUpstream = modelOverride
} }
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
upstreamModel = modelOverride
} else if !strings.EqualFold(upstreamModel, req.Model) {
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
upstreamModel = modelOverride
}
}
body, _ = sjson.SetBytes(body, "model", upstreamModel)
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") { if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
body = checkSystemInstructions(body) body = checkSystemInstructions(body)
} }
@@ -427,25 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
return betas, body return betas, body
} }
// injectThinkingConfig adds thinking configuration based on model name suffix // injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte { func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
// Only inject if thinking config is not already present // Only inject if thinking config is not already present
if gjson.GetBytes(body, "thinking").Exists() { if gjson.GetBytes(body, "thinking").Exists() {
return body return body
} }
var budgetTokens int budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
switch { if !ok || budgetTokens <= 0 {
case strings.HasSuffix(modelName, "-thinking-low"):
budgetTokens = 1024
case strings.HasSuffix(modelName, "-thinking-medium"):
budgetTokens = 8192
case strings.HasSuffix(modelName, "-thinking-high"):
budgetTokens = 24576
case strings.HasSuffix(modelName, "-thinking"):
// Default thinking without suffix uses medium budget
budgetTokens = 8192
default:
return body return body
} }
@@ -454,6 +467,44 @@ func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []b
return body return body
} }
func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
if matched {
if include != nil && !*include {
return 0, false
}
if budget != nil {
normalized := util.NormalizeThinkingBudget(modelName, *budget)
if normalized > 0 {
return normalized, true
}
return 0, false
}
if effort != nil {
if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
return derived, true
}
}
}
return claudeBudgetFromSuffix(modelName)
}
func claudeBudgetFromSuffix(modelName string) (int, bool) {
lower := strings.ToLower(strings.TrimSpace(modelName))
switch {
case strings.HasSuffix(lower, "-thinking-low"):
return 1024, true
case strings.HasSuffix(lower, "-thinking-medium"):
return 8192, true
case strings.HasSuffix(lower, "-thinking-high"):
return 24576, true
case strings.HasSuffix(lower, "-thinking"):
return 8192, true
default:
return 0, false
}
}
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled. // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
// Anthropic API requires this constraint; violating it returns a 400 error. // Anthropic API requires this constraint; violating it returns a 400 error.
// This function should be called after all thinking configuration is finalized. // This function should be called after all thinking configuration is finalized.
@@ -491,35 +542,45 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
} }
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
if alias == "" { trimmed := strings.TrimSpace(alias)
if trimmed == "" {
return "" return ""
} }
// Hardcoded mappings for thinking models to actual Claude model names
switch alias {
case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
return "claude-opus-4-5-20251101"
case "claude-sonnet-4-5-thinking":
return "claude-sonnet-4-5-20250929"
}
entry := e.resolveClaudeConfig(auth) entry := e.resolveClaudeConfig(auth)
if entry == nil { if entry == nil {
return "" return ""
} }
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
// Candidate names to match against configured aliases/names.
candidates := []string{strings.TrimSpace(normalizedModel)}
if !strings.EqualFold(normalizedModel, trimmed) {
candidates = append(candidates, trimmed)
}
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
candidates = append(candidates, original)
}
for i := range entry.Models { for i := range entry.Models {
model := entry.Models[i] model := entry.Models[i]
name := strings.TrimSpace(model.Name) name := strings.TrimSpace(model.Name)
modelAlias := strings.TrimSpace(model.Alias) modelAlias := strings.TrimSpace(model.Alias)
if modelAlias != "" {
if strings.EqualFold(modelAlias, alias) { for _, candidate := range candidates {
if candidate == "" {
continue
}
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
if name != "" { if name != "" {
return name return name
} }
return alias return candidate
}
if name != "" && strings.EqualFold(name, candidate) {
return name
} }
continue
}
if name != "" && strings.EqualFold(name, alias) {
return name
} }
} }
return "" return ""

View File

@@ -49,14 +49,14 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("codex") to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = e.setReasoningEffortByAlias(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.SetBytes(body, "stream", true)
body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -142,13 +142,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("codex") to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = e.setReasoningEffortByAlias(req.Model, body) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "model", upstreamModel)
url := strings.TrimSuffix(baseURL, "/") + "/responses" url := strings.TrimSuffix(baseURL, "/") + "/responses"
httpReq, err := e.cacheHelper(ctx, from, url, req, body) httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -235,14 +238,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
} }
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("codex") to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
modelForCounting := req.Model modelForCounting := req.Model
body = e.setReasoningEffortByAlias(req.Model, body) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "stream", false) body, _ = sjson.SetBytes(body, "stream", false)
@@ -261,83 +266,6 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
} }
func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
switch modelName {
case "gpt-5-minimal":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
case "gpt-5-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
switch modelName {
case "gpt-5-codex-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5-codex-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5-codex-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
switch modelName {
case "gpt-5-codex-mini-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5-codex-mini-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
switch modelName {
case "gpt-5.1-none":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
case "gpt-5.1-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5.1-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
switch modelName {
case "gpt-5.1-codex-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5.1-codex-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-codex-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
switch modelName {
case "gpt-5.1-codex-mini-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-codex-mini-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
switch modelName {
case "gpt-5.1-codex-max-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5.1-codex-max-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-codex-max-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
case "gpt-5.1-codex-max-xhigh":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
}
}
return payload
}
func tokenizerForCodexModel(model string) (tokenizer.Codec, error) { func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
sanitized := strings.ToLower(strings.TrimSpace(model)) sanitized := strings.ToLower(strings.TrimSpace(model))
switch { switch {

View File

@@ -75,6 +75,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
// Official Gemini API via API key or OAuth bearer // Official Gemini API via API key or OAuth bearer
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
@@ -85,6 +87,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
action := "generateContent" action := "generateContent"
if req.Metadata != nil { if req.Metadata != nil {
@@ -93,7 +96,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
} }
} }
baseURL := resolveGeminiBaseURL(auth) baseURL := resolveGeminiBaseURL(auth)
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, action) url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
if opts.Alt != "" && action != "countTokens" { if opts.Alt != "" && action != "countTokens" {
url = url + fmt.Sprintf("?$alt=%s", opts.Alt) url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
} }
@@ -167,6 +170,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
@@ -176,9 +181,10 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
baseURL := resolveGeminiBaseURL(auth) baseURL := resolveGeminiBaseURL(auth)
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "streamGenerateContent") url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
if opts.Alt == "" { if opts.Alt == "" {
url = url + "?alt=sse" url = url + "?alt=sse"
} else { } else {

View File

@@ -105,10 +105,12 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau
// countTokensWithServiceAccount handles token counting using service account credentials. // countTokensWithServiceAccount handles token counting using service account credentials.
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) { func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil { if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm budgetOverride = &norm
@@ -117,13 +119,14 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
} }
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt) respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
baseURL := vertexBaseURL(location) baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens") url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
if errNewReq != nil { if errNewReq != nil {
@@ -191,10 +194,12 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
// countTokensWithAPIKey handles token counting using API key credentials. // countTokensWithAPIKey handles token counting using API key credentials.
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) { func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil { if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm budgetOverride = &norm
@@ -203,6 +208,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
} }
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt) respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -286,10 +292,12 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil { if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm budgetOverride = &norm
@@ -301,6 +309,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
action := "generateContent" action := "generateContent"
if req.Metadata != nil { if req.Metadata != nil {
@@ -309,7 +318,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
} }
} }
baseURL := vertexBaseURL(location) baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action) url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
if opts.Alt != "" && action != "countTokens" { if opts.Alt != "" && action != "countTokens" {
url = url + fmt.Sprintf("?$alt=%s", opts.Alt) url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
} }
@@ -383,10 +392,12 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil { if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm budgetOverride = &norm
@@ -398,6 +409,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
action := "generateContent" action := "generateContent"
if req.Metadata != nil { if req.Metadata != nil {
@@ -410,7 +422,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
if baseURL == "" { if baseURL == "" {
baseURL = "https://generativelanguage.googleapis.com" baseURL = "https://generativelanguage.googleapis.com"
} }
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action) url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
if opts.Alt != "" && action != "countTokens" { if opts.Alt != "" && action != "countTokens" {
url = url + fmt.Sprintf("?$alt=%s", opts.Alt) url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
} }
@@ -481,10 +493,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil { if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm budgetOverride = &norm
@@ -496,9 +510,10 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
baseURL := vertexBaseURL(location) baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent") url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
if opts.Alt == "" { if opts.Alt == "" {
url = url + "?alt=sse" url = url + "?alt=sse"
} else { } else {
@@ -595,10 +610,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err) defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil { if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm budgetOverride = &norm
@@ -610,12 +627,13 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
// For API key auth, use simpler URL format without project/location // For API key auth, use simpler URL format without project/location
if baseURL == "" { if baseURL == "" {
baseURL = "https://generativelanguage.googleapis.com" baseURL = "https://generativelanguage.googleapis.com"
} }
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent") url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
if opts.Alt == "" { if opts.Alt == "" {
url = url + "?alt=sse" url = url + "?alt=sse"
} else { } else {

View File

@@ -57,6 +57,10 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -139,6 +143,10 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
toolsResult := gjson.GetBytes(body, "tools") toolsResult := gjson.GetBytes(body, "tools")
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {

View File

@@ -58,6 +58,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
translated = e.overrideModel(translated, modelOverride) translated = e.overrideModel(translated, modelOverride)
} }
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -143,6 +147,10 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
translated = e.overrideModel(translated, modelOverride) translated = e.overrideModel(translated, modelOverride)
} }
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))

View File

@@ -12,8 +12,8 @@ import (
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) // applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking. // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata) budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
if !ok { if !ok || (budgetOverride == nil && includeOverride == nil) {
return payload return payload
} }
if !util.ModelSupportsThinking(model) { if !util.ModelSupportsThinking(model) {
@@ -29,17 +29,60 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) // applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata) budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
if !ok { if !ok || (budgetOverride == nil && includeOverride == nil) {
return payload return payload
} }
if budgetOverride != nil && util.ModelSupportsThinking(model) { if !util.ModelSupportsThinking(model) {
return payload
}
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(model, *budgetOverride) norm := util.NormalizeThinkingBudget(model, *budgetOverride)
budgetOverride = &norm budgetOverride = &norm
} }
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
} }
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning.effort").Exists() {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
return updated
}
}
return payload
}
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
return updated
}
}
return payload
}
// applyPayloadConfig applies payload default and override rules from configuration // applyPayloadConfig applies payload default and override rules from configuration
// to the given JSON payload for the specified model. // to the given JSON payload for the specified model.
// Defaults only fill missing fields, while overrides always overwrite existing values. // Defaults only fill missing fields, while overrides always overwrite existing values.

View File

@@ -12,6 +12,7 @@ import (
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen" qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -50,6 +51,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = applyPayloadConfig(e.cfg, req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body)
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -121,6 +126,10 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
toolsResult := gjson.GetBytes(body, "tools") toolsResult := gjson.GetBytes(body, "tools")
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
// This will have no real consequences. It's just to scare Qwen3. // This will have no real consequences. It's just to scare Qwen3.

View File

@@ -1,8 +1,6 @@
package util package util
import ( import (
"encoding/json"
"strconv"
"strings" "strings"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
@@ -15,80 +13,6 @@ const (
GeminiOriginalModelMetadataKey = "gemini_original_model" GeminiOriginalModelMetadataKey = "gemini_original_model"
) )
func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
if model == "" {
return model, nil, nil, false
}
lower := strings.ToLower(model)
if !strings.HasPrefix(lower, "gemini-") {
return model, nil, nil, false
}
if strings.HasSuffix(lower, "-nothinking") {
base := model[:len(model)-len("-nothinking")]
budgetValue := 0
if strings.HasPrefix(lower, "gemini-2.5-pro") {
budgetValue = 128
}
include := false
return base, &budgetValue, &include, true
}
// Handle "-reasoning" suffix: enables thinking with dynamic budget (-1)
// Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1
if strings.HasSuffix(lower, "-reasoning") {
base := model[:len(model)-len("-reasoning")]
budgetValue := -1 // Dynamic budget
include := true
return base, &budgetValue, &include, true
}
idx := strings.LastIndex(lower, "-thinking-")
if idx == -1 {
return model, nil, nil, false
}
digits := model[idx+len("-thinking-"):]
if digits == "" {
return model, nil, nil, false
}
end := len(digits)
for i := 0; i < len(digits); i++ {
if digits[i] < '0' || digits[i] > '9' {
end = i
break
}
}
if end == 0 {
return model, nil, nil, false
}
valueStr := digits[:end]
value, err := strconv.Atoi(valueStr)
if err != nil {
return model, nil, nil, false
}
base := model[:idx]
budgetValue := value
return base, &budgetValue, nil, true
}
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
if !matched {
return baseModel, nil
}
metadata := map[string]any{
GeminiOriginalModelMetadataKey: modelName,
}
if budget != nil {
metadata[GeminiThinkingBudgetMetadataKey] = *budget
}
if include != nil {
metadata[GeminiIncludeThoughtsMetadataKey] = *include
}
return baseModel, metadata
}
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte { func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
if budget == nil && includeThoughts == nil { if budget == nil && includeThoughts == nil {
return body return body
@@ -133,80 +57,6 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
return updated return updated
} }
func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
if len(metadata) == 0 {
return nil, nil, false
}
var (
budgetPtr *int
includePtr *bool
matched bool
)
if rawBudget, ok := metadata[GeminiThinkingBudgetMetadataKey]; ok {
switch v := rawBudget.(type) {
case int:
budget := v
budgetPtr = &budget
matched = true
case int32:
budget := int(v)
budgetPtr = &budget
matched = true
case int64:
budget := int(v)
budgetPtr = &budget
matched = true
case float64:
budget := int(v)
budgetPtr = &budget
matched = true
case json.Number:
if val, err := v.Int64(); err == nil {
budget := int(val)
budgetPtr = &budget
matched = true
}
}
}
if rawInclude, ok := metadata[GeminiIncludeThoughtsMetadataKey]; ok {
switch v := rawInclude.(type) {
case bool:
include := v
includePtr = &include
matched = true
case string:
if parsed, err := strconv.ParseBool(v); err == nil {
include := parsed
includePtr = &include
matched = true
}
case json.Number:
if val, err := v.Int64(); err == nil {
include := val != 0
includePtr = &include
matched = true
}
case int:
include := v != 0
includePtr = &include
matched = true
case int32:
include := v != 0
includePtr = &include
matched = true
case int64:
include := v != 0
includePtr = &include
matched = true
case float64:
include := v != 0
includePtr = &include
matched = true
}
}
return budgetPtr, includePtr, matched
}
// modelsWithDefaultThinking lists models that should have thinking enabled by default // modelsWithDefaultThinking lists models that should have thinking enabled by default
// when no explicit thinkingConfig is provided. // when no explicit thinkingConfig is provided.
var modelsWithDefaultThinking = map[string]bool{ var modelsWithDefaultThinking = map[string]bool{

View File

@@ -0,0 +1,327 @@
package util
import (
"encoding/json"
"strconv"
"strings"
)
const (
ThinkingBudgetMetadataKey = "thinking_budget"
ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
ReasoningEffortMetadataKey = "reasoning_effort"
ThinkingOriginalModelMetadataKey = "thinking_original_model"
)
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
// the normalized base model with extracted metadata. Supported patterns:
// - "-thinking-<number>" extracts a numeric budget
// - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
// - "-thinking" maps to a default reasoning effort of "medium"
// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
// - "-nothinking" maps to budget=0 and include_thoughts=false
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
if modelName == "" {
return modelName, nil
}
lower := strings.ToLower(modelName)
baseModel := modelName
var (
budgetOverride *int
includeThoughts *bool
reasoningEffort *string
matched bool
)
switch {
case strings.HasSuffix(lower, "-nothinking"):
baseModel = modelName[:len(modelName)-len("-nothinking")]
budget := 0
include := false
budgetOverride = &budget
includeThoughts = &include
matched = true
case strings.HasSuffix(lower, "-reasoning"):
baseModel = modelName[:len(modelName)-len("-reasoning")]
budget := -1
include := true
budgetOverride = &budget
includeThoughts = &include
matched = true
default:
if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
value := modelName[idx+len("-thinking-"):]
if value != "" {
if parsed, ok := parseIntPrefix(value); ok {
baseModel = modelName[:idx]
budgetOverride = &parsed
matched = true
} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
baseModel = modelName[:idx]
reasoningEffort = &effort
matched = true
}
}
} else if strings.HasSuffix(lower, "-thinking") {
baseModel = modelName[:len(modelName)-len("-thinking")]
effort := "medium"
reasoningEffort = &effort
matched = true
}
}
if !matched {
return baseModel, nil
}
metadata := map[string]any{
ThinkingOriginalModelMetadataKey: modelName,
}
if budgetOverride != nil {
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
}
if includeThoughts != nil {
metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
}
if reasoningEffort != nil {
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
}
return baseModel, metadata
}
// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
// It accepts both the new generic keys and legacy Gemini-specific keys.
func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
if len(metadata) == 0 {
return nil, nil, nil, false
}
var (
budgetPtr *int
includePtr *bool
effortPtr *string
matched bool
)
readBudget := func(key string) {
if budgetPtr != nil {
return
}
if raw, ok := metadata[key]; ok {
if v, okNumber := parseNumberToInt(raw); okNumber {
budget := v
budgetPtr = &budget
matched = true
}
}
}
readInclude := func(key string) {
if includePtr != nil {
return
}
if raw, ok := metadata[key]; ok {
switch v := raw.(type) {
case bool:
val := v
includePtr = &val
matched = true
case *bool:
if v != nil {
val := *v
includePtr = &val
matched = true
}
}
}
}
readEffort := func(key string) {
if effortPtr != nil {
return
}
if raw, ok := metadata[key]; ok {
if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
normalized := strings.ToLower(strings.TrimSpace(val))
effortPtr = &normalized
matched = true
}
}
}
readBudget(ThinkingBudgetMetadataKey)
readBudget(GeminiThinkingBudgetMetadataKey)
readInclude(ThinkingIncludeThoughtsMetadataKey)
readInclude(GeminiIncludeThoughtsMetadataKey)
readEffort(ReasoningEffortMetadataKey)
readEffort("reasoning.effort")
return budgetPtr, includePtr, effortPtr, matched
}
// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
// converting reasoning effort strings into budgets when possible.
func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
budget, include, effort, matched := ThinkingFromMetadata(metadata)
if !matched {
return nil, nil, false
}
if budget == nil && effort != nil {
if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
budget = &derived
}
}
return budget, include, budget != nil || include != nil || effort != nil
}
// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
budget, include, effort, matched := ThinkingFromMetadata(metadata)
if !matched {
return "", false
}
if effort != nil && *effort != "" {
return *effort, true
}
if budget != nil {
switch *budget {
case -1:
return "auto", true
case 0:
return "none", true
}
}
if include != nil && !*include {
return "none", true
}
return "", true
}
// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets,
// clamping the result to the model's supported range.
func ThinkingEffortToBudget(model, effort string) (int, bool) {
if effort == "" {
return 0, false
}
switch strings.ToLower(effort) {
case "none":
return 0, true
case "auto":
return NormalizeThinkingBudget(model, -1), true
case "minimal":
return NormalizeThinkingBudget(model, 512), true
case "low":
return NormalizeThinkingBudget(model, 1024), true
case "medium":
return NormalizeThinkingBudget(model, 8192), true
case "high":
return NormalizeThinkingBudget(model, 24576), true
case "xhigh":
return NormalizeThinkingBudget(model, 32768), true
default:
return 0, false
}
}
// ResolveOriginalModel returns the original model name stored in metadata (if present),
// otherwise falls back to the provided model.
func ResolveOriginalModel(model string, metadata map[string]any) string {
normalize := func(name string) string {
if name == "" {
return ""
}
if base, _ := NormalizeThinkingModel(name); base != "" {
return base
}
return strings.TrimSpace(name)
}
if metadata != nil {
if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
if base := normalize(s); base != "" {
return base
}
}
}
if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
if base := normalize(s); base != "" {
return base
}
}
}
}
// Fallback: try to re-normalize the model name when metadata was dropped.
if base := normalize(model); base != "" {
return base
}
return model
}
func parseIntPrefix(value string) (int, bool) {
if value == "" {
return 0, false
}
digits := strings.TrimLeft(value, "-")
if digits == "" {
return 0, false
}
end := len(digits)
for i := 0; i < len(digits); i++ {
if digits[i] < '0' || digits[i] > '9' {
end = i
break
}
}
if end == 0 {
return 0, false
}
val, err := strconv.Atoi(digits[:end])
if err != nil {
return 0, false
}
return val, true
}
func parseNumberToInt(raw any) (int, bool) {
switch v := raw.(type) {
case int:
return v, true
case int32:
return int(v), true
case int64:
return int(v), true
case float64:
return int(v), true
case json.Number:
if val, err := v.Int64(); err == nil {
return int(val), true
}
case string:
if strings.TrimSpace(v) == "" {
return 0, false
}
if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
return parsed, true
}
}
return 0, false
}
func normalizeReasoningEffort(value string) (string, bool) {
if value == "" {
return "", false
}
effort := strings.ToLower(strings.TrimSpace(value))
switch effort {
case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
return effort, true
default:
return "", false
}
}

View File

@@ -323,18 +323,32 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string
providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName) providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)
// First, normalize the model name to handle suffixes like "-thinking-128" targetModelName := resolvedModelName
// This needs to happen before determining the provider for non-dynamic models. if isDynamic {
normalizedModel, metadata = normalizeModelMetadata(resolvedModelName) targetModelName = extractedModelName
}
// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
normalizedModel, metadata = normalizeModelMetadata(targetModelName)
if isDynamic { if isDynamic {
providers = []string{providerName} providers = []string{providerName}
// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
// so we use it as the final normalizedModel.
normalizedModel = extractedModelName
} else { } else {
// For non-dynamic models, use the normalizedModel to get the provider name. // For non-dynamic models, use the normalizedModel to get the provider name.
providers = util.GetProviderName(normalizedModel) providers = util.GetProviderName(normalizedModel)
if len(providers) == 0 && metadata != nil {
if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
if originalModel, okStr := originalRaw.(string); okStr {
originalModel = strings.TrimSpace(originalModel)
if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
providers = altProviders
normalizedModel = originalModel
}
}
}
}
}
} }
if len(providers) == 0 { if len(providers) == 0 {
@@ -382,7 +396,7 @@ func cloneBytes(src []byte) []byte {
} }
func normalizeModelMetadata(modelName string) (string, map[string]any) { func normalizeModelMetadata(modelName string) (string, map[string]any) {
return util.NormalizeGeminiThinkingModel(modelName) return util.NormalizeThinkingModel(modelName)
} }
func cloneMetadata(src map[string]any) map[string]any { func cloneMetadata(src map[string]any) map[string]any {