mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 04:50:52 +08:00
refactor: improve thinking logic
This commit is contained in:
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -134,10 +135,11 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
||||
}
|
||||
|
||||
// Normalize model (handles dynamic thinking suffixes)
|
||||
normalizedModel, thinkingMetadata := util.NormalizeThinkingModel(modelName)
|
||||
suffixResult := thinking.ParseSuffix(modelName)
|
||||
normalizedModel := suffixResult.ModelName
|
||||
thinkingSuffix := ""
|
||||
if thinkingMetadata != nil && strings.HasPrefix(modelName, normalizedModel) {
|
||||
thinkingSuffix = modelName[len(normalizedModel):]
|
||||
if suffixResult.HasSuffix {
|
||||
thinkingSuffix = "(" + suffixResult.RawSuffix + ")"
|
||||
}
|
||||
|
||||
resolveMappedModel := func() (string, []string) {
|
||||
@@ -157,13 +159,13 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
||||
// Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target
|
||||
// already specifies its own thinking suffix.
|
||||
if thinkingSuffix != "" {
|
||||
_, mappedThinkingMetadata := util.NormalizeThinkingModel(mappedModel)
|
||||
if mappedThinkingMetadata == nil {
|
||||
mappedSuffixResult := thinking.ParseSuffix(mappedModel)
|
||||
if !mappedSuffixResult.HasSuffix {
|
||||
mappedModel += thinkingSuffix
|
||||
}
|
||||
}
|
||||
|
||||
mappedBaseModel, _ := util.NormalizeThinkingModel(mappedModel)
|
||||
mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName
|
||||
mappedProviders := util.GetProviderName(mappedBaseModel)
|
||||
if len(mappedProviders) == 0 {
|
||||
return "", nil
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -44,6 +45,11 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
|
||||
// MapModel checks if a mapping exists for the requested model and if the
|
||||
// target model has available local providers. Returns the mapped model name
|
||||
// or empty string if no valid mapping exists.
|
||||
//
|
||||
// If the requested model contains a thinking suffix (e.g., "g25p(8192)"),
|
||||
// the suffix is preserved in the returned model name (e.g., "gemini-2.5-pro(8192)").
|
||||
// However, if the mapping target already contains a suffix, the config suffix
|
||||
// takes priority over the user's suffix.
|
||||
func (m *DefaultModelMapper) MapModel(requestedModel string) string {
|
||||
if requestedModel == "" {
|
||||
return ""
|
||||
@@ -52,16 +58,20 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
// Normalize the requested model for lookup
|
||||
normalizedRequest := strings.ToLower(strings.TrimSpace(requestedModel))
|
||||
// Extract thinking suffix from requested model using ParseSuffix
|
||||
requestResult := thinking.ParseSuffix(requestedModel)
|
||||
baseModel := requestResult.ModelName
|
||||
|
||||
// Check for direct mapping
|
||||
targetModel, exists := m.mappings[normalizedRequest]
|
||||
// Normalize the base model for lookup (case-insensitive)
|
||||
normalizedBase := strings.ToLower(strings.TrimSpace(baseModel))
|
||||
|
||||
// Check for direct mapping using base model name
|
||||
targetModel, exists := m.mappings[normalizedBase]
|
||||
if !exists {
|
||||
// Try regex mappings in order
|
||||
base, _ := util.NormalizeThinkingModel(requestedModel)
|
||||
// Try regex mappings in order using base model only
|
||||
// (suffix is handled separately via ParseSuffix)
|
||||
for _, rm := range m.regexps {
|
||||
if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) {
|
||||
if rm.re.MatchString(baseModel) {
|
||||
targetModel = rm.to
|
||||
exists = true
|
||||
break
|
||||
@@ -72,14 +82,28 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify target model has available providers
|
||||
normalizedTarget, _ := util.NormalizeThinkingModel(targetModel)
|
||||
providers := util.GetProviderName(normalizedTarget)
|
||||
// Check if target model already has a thinking suffix (config priority)
|
||||
targetResult := thinking.ParseSuffix(targetModel)
|
||||
|
||||
// Verify target model has available providers (use base model for lookup)
|
||||
providers := util.GetProviderName(targetResult.ModelName)
|
||||
if len(providers) == 0 {
|
||||
log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Suffix handling: config suffix takes priority, otherwise preserve user suffix
|
||||
if targetResult.HasSuffix {
|
||||
// Config's "to" already contains a suffix - use it as-is (config priority)
|
||||
return targetModel
|
||||
}
|
||||
|
||||
// Preserve user's thinking suffix on the mapped model
|
||||
// (skip empty suffixes to avoid returning "model()")
|
||||
if requestResult.HasSuffix && requestResult.RawSuffix != "" {
|
||||
return targetModel + "(" + requestResult.RawSuffix + ")"
|
||||
}
|
||||
|
||||
// Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go
|
||||
return targetModel
|
||||
}
|
||||
|
||||
@@ -217,10 +217,10 @@ func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) {
|
||||
|
||||
mapper := NewModelMapper(mappings)
|
||||
|
||||
// Incoming model has reasoning suffix but should match base via regex
|
||||
// Incoming model has reasoning suffix, regex matches base, suffix is preserved
|
||||
result := mapper.MapModel("gpt-5(high)")
|
||||
if result != "gemini-2.5-pro" {
|
||||
t.Errorf("Expected gemini-2.5-pro, got %s", result)
|
||||
if result != "gemini-2.5-pro(high)" {
|
||||
t.Errorf("Expected gemini-2.5-pro(high), got %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -281,3 +281,95 @@ func TestModelMapper_Regex_CaseInsensitive(t *testing.T) {
|
||||
t.Errorf("Expected claude-sonnet-4, got %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestModelMapper_SuffixPreservation(t *testing.T) {
|
||||
reg := registry.GetGlobalRegistry()
|
||||
|
||||
// Register test models
|
||||
reg.RegisterClient("test-client-suffix", "gemini", []*registry.ModelInfo{
|
||||
{ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"},
|
||||
})
|
||||
reg.RegisterClient("test-client-suffix-2", "claude", []*registry.ModelInfo{
|
||||
{ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"},
|
||||
})
|
||||
defer reg.UnregisterClient("test-client-suffix")
|
||||
defer reg.UnregisterClient("test-client-suffix-2")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
mappings []config.AmpModelMapping
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "numeric suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(8192)",
|
||||
want: "gemini-2.5-pro(8192)",
|
||||
},
|
||||
{
|
||||
name: "level suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(high)",
|
||||
want: "gemini-2.5-pro(high)",
|
||||
},
|
||||
{
|
||||
name: "no suffix unchanged",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p",
|
||||
want: "gemini-2.5-pro",
|
||||
},
|
||||
{
|
||||
name: "config suffix takes priority",
|
||||
mappings: []config.AmpModelMapping{{From: "alias", To: "gemini-2.5-pro(medium)"}},
|
||||
input: "alias(high)",
|
||||
want: "gemini-2.5-pro(medium)",
|
||||
},
|
||||
{
|
||||
name: "regex with suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "^g25.*", To: "gemini-2.5-pro", Regex: true}},
|
||||
input: "g25p(8192)",
|
||||
want: "gemini-2.5-pro(8192)",
|
||||
},
|
||||
{
|
||||
name: "auto suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(auto)",
|
||||
want: "gemini-2.5-pro(auto)",
|
||||
},
|
||||
{
|
||||
name: "none suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(none)",
|
||||
want: "gemini-2.5-pro(none)",
|
||||
},
|
||||
{
|
||||
name: "case insensitive base lookup with suffix",
|
||||
mappings: []config.AmpModelMapping{{From: "G25P", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(high)",
|
||||
want: "gemini-2.5-pro(high)",
|
||||
},
|
||||
{
|
||||
name: "empty suffix filtered out",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p()",
|
||||
want: "gemini-2.5-pro",
|
||||
},
|
||||
{
|
||||
name: "incomplete suffix treated as no suffix",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p(high", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(high",
|
||||
want: "gemini-2.5-pro",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
mapper := NewModelMapper(tt.mappings)
|
||||
got := mapper.MapModel(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("MapModel(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,6 +266,9 @@ type ClaudeKey struct {
|
||||
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
|
||||
}
|
||||
|
||||
func (k ClaudeKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k ClaudeKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// ClaudeModel describes a mapping between an alias and the actual upstream model name.
|
||||
type ClaudeModel struct {
|
||||
// Name is the upstream model identifier used when issuing requests.
|
||||
@@ -308,6 +311,9 @@ type CodexKey struct {
|
||||
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
|
||||
}
|
||||
|
||||
func (k CodexKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k CodexKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// CodexModel describes a mapping between an alias and the actual upstream model name.
|
||||
type CodexModel struct {
|
||||
// Name is the upstream model identifier used when issuing requests.
|
||||
@@ -349,6 +355,9 @@ type GeminiKey struct {
|
||||
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
|
||||
}
|
||||
|
||||
func (k GeminiKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k GeminiKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// GeminiModel describes a mapping between an alias and the actual upstream model name.
|
||||
type GeminiModel struct {
|
||||
// Name is the upstream model identifier used when issuing requests.
|
||||
@@ -406,6 +415,9 @@ type OpenAICompatibilityModel struct {
|
||||
Alias string `yaml:"alias" json:"alias"`
|
||||
}
|
||||
|
||||
func (m OpenAICompatibilityModel) GetName() string { return m.Name }
|
||||
func (m OpenAICompatibilityModel) GetAlias() string { return m.Alias }
|
||||
|
||||
// LoadConfig reads a YAML configuration file from the given path,
|
||||
// unmarshals it into a Config struct, applies environment variable overrides,
|
||||
// and returns it.
|
||||
|
||||
@@ -36,6 +36,9 @@ type VertexCompatKey struct {
|
||||
Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"`
|
||||
}
|
||||
|
||||
func (k VertexCompatKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k VertexCompatKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// VertexCompatModel represents a model configuration for Vertex compatibility,
|
||||
// including the actual model name and its alias for API routing.
|
||||
type VertexCompatModel struct {
|
||||
|
||||
@@ -27,7 +27,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.5 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-20251101",
|
||||
@@ -39,7 +39,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
Description: "Premium model combining maximum intelligence with practical performance",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-1-20250805",
|
||||
@@ -50,7 +50,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.1 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-20250514",
|
||||
@@ -61,7 +61,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-20250514",
|
||||
@@ -72,7 +72,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-7-sonnet-20250219",
|
||||
@@ -83,7 +83,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 3.7 Sonnet",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-5-haiku-20241022",
|
||||
@@ -777,8 +777,8 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
|
||||
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
|
||||
"gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
|
||||
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
|
||||
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -51,6 +51,11 @@ type ModelInfo struct {
|
||||
// Thinking holds provider-specific reasoning/thinking budget capabilities.
|
||||
// This is optional and currently used for Gemini thinking budget normalization.
|
||||
Thinking *ThinkingSupport `json:"thinking,omitempty"`
|
||||
|
||||
// UserDefined indicates this model was defined through config file's models[]
|
||||
// array (e.g., openai-compatibility.*.models[], *-api-key.models[]).
|
||||
// UserDefined models have thinking configuration passed through without validation.
|
||||
UserDefined bool `json:"-"`
|
||||
}
|
||||
|
||||
// ThinkingSupport describes a model family's supported internal reasoning budget range.
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -111,7 +111,8 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// Execute performs a non-streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
translatedReq, body, err := e.translateRequest(req, opts, false)
|
||||
@@ -119,7 +120,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
return resp, err
|
||||
}
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
|
||||
endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
URL: endpoint,
|
||||
@@ -166,7 +167,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
|
||||
// ExecuteStream performs a streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
translatedReq, body, err := e.translateRequest(req, opts, true)
|
||||
@@ -174,7 +176,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
return nil, err
|
||||
}
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
|
||||
endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
URL: endpoint,
|
||||
@@ -315,6 +317,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
|
||||
// CountTokens counts tokens for the given request using the AI Studio API.
|
||||
func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
_, body, err := e.translateRequest(req, opts, false)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
@@ -324,7 +327,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
|
||||
body.payload, _ = sjson.DeleteBytes(body.payload, "tools")
|
||||
body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings")
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, "countTokens", "")
|
||||
endpoint := e.buildEndpoint(baseModel, "countTokens", "")
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
URL: endpoint,
|
||||
@@ -380,22 +383,19 @@ type translatedPayload struct {
|
||||
}
|
||||
|
||||
func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
|
||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
|
||||
payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
|
||||
payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true)
|
||||
payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
||||
payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
|
||||
payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
|
||||
payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini")
|
||||
payload = fixGeminiImageAspectRatio(baseModel, payload)
|
||||
payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated)
|
||||
payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
|
||||
payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
|
||||
payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -107,8 +108,10 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut
|
||||
|
||||
// Execute performs a non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
if isClaude || strings.Contains(req.Model, "gemini-3-pro") {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
|
||||
|
||||
if isClaude || strings.Contains(baseModel, "gemini-3-pro") {
|
||||
return e.executeClaudeNonStream(ctx, auth, req, opts)
|
||||
}
|
||||
|
||||
@@ -120,23 +123,24 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
auth = updatedAuth
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
|
||||
translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
|
||||
|
||||
// Preserve Claude special handling (use baseModel for registry lookups)
|
||||
translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -146,7 +150,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return resp, err
|
||||
@@ -227,6 +231,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
|
||||
// executeClaudeNonStream performs a claude non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
if errToken != nil {
|
||||
return resp, errToken
|
||||
@@ -235,23 +241,24 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
|
||||
auth = updatedAuth
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, true)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
|
||||
translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
|
||||
|
||||
// Preserve Claude special handling (use baseModel for registry lookups)
|
||||
translated = normalizeAntigravityThinking(baseModel, translated, true)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -261,7 +268,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return resp, err
|
||||
@@ -587,7 +594,10 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
|
||||
|
||||
// ExecuteStream performs a streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
ctx = context.WithValue(ctx, "alt", "")
|
||||
isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
|
||||
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
if errToken != nil {
|
||||
@@ -597,25 +607,24 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
auth = updatedAuth
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
|
||||
translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
|
||||
|
||||
// Preserve Claude special handling (use baseModel for registry lookups)
|
||||
translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -625,7 +634,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return nil, err
|
||||
@@ -771,6 +780,9 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
|
||||
|
||||
// CountTokens counts tokens for the given request using the Antigravity API.
|
||||
func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
|
||||
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
if errToken != nil {
|
||||
return cliproxyexecutor.Response{}, errToken
|
||||
@@ -786,7 +798,16 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
// Prepare payload once (doesn't depend on baseURL)
|
||||
payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
payload, _ = thinking.ApplyThinking(payload, req.Model, "antigravity")
|
||||
|
||||
// Preserve Claude special handling (use baseModel for registry lookups)
|
||||
payload = normalizeAntigravityThinking(baseModel, payload, isClaude)
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = deleteJSONField(payload, "request.safetySettings")
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -803,14 +824,6 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
|
||||
payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = deleteJSONField(payload, "request.safetySettings")
|
||||
|
||||
base := strings.TrimSuffix(baseURL, "/")
|
||||
if base == "" {
|
||||
base = buildBaseURL(auth)
|
||||
@@ -1462,11 +1475,18 @@ func alias2ModelName(modelName string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeAntigravityThinking clamps or removes thinking config based on model support.
|
||||
// For Claude models, it additionally ensures thinking budget < max_tokens.
|
||||
// normalizeAntigravityThinking performs Antigravity-specific thinking config normalization.
|
||||
// This function is called AFTER thinking.ApplyThinking() to apply Claude-specific constraints.
|
||||
//
|
||||
// It handles:
|
||||
// - Stripping thinking config for unsupported models (via util.StripThinkingConfigIfUnsupported)
|
||||
// - Normalizing budget to model range (via thinking.ClampBudget)
|
||||
// - For Claude models: ensuring thinking budget < max_tokens
|
||||
// - For Claude models: removing thinkingConfig if budget < minimum allowed
|
||||
func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte {
|
||||
payload = util.StripThinkingConfigIfUnsupported(model, payload)
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||
if modelInfo == nil || modelInfo.Thinking == nil {
|
||||
return payload
|
||||
}
|
||||
budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
@@ -1474,7 +1494,7 @@ func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) [
|
||||
return payload
|
||||
}
|
||||
raw := int(budget.Int())
|
||||
normalized := util.NormalizeThinkingBudget(model, raw)
|
||||
normalized := thinking.ClampBudget(raw, modelInfo.Thinking.Min, modelInfo.Thinking.Max)
|
||||
|
||||
if isClaude {
|
||||
effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -84,17 +85,15 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
@@ -103,22 +102,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(model, req.Metadata, body)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
if !strings.HasPrefix(model, "claude-3-5-haiku") {
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "claude")
|
||||
|
||||
if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
||||
body = ensureMaxTokensForThinking(model, body)
|
||||
body = ensureMaxTokensForThinking(baseModel, body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
@@ -218,36 +217,35 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(model, req.Metadata, body)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "claude")
|
||||
|
||||
body = checkSystemInstructions(body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
||||
body = ensureMaxTokensForThinking(model, body)
|
||||
body = ensureMaxTokensForThinking(baseModel, body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
@@ -381,8 +379,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
@@ -391,14 +390,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
to := sdktranslator.FromString("claude")
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
if !strings.HasPrefix(model, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
|
||||
@@ -527,17 +522,6 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
|
||||
return betas, body
|
||||
}
|
||||
|
||||
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
|
||||
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
|
||||
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
|
||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
||||
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
|
||||
if !ok {
|
||||
return body
|
||||
}
|
||||
return util.ApplyClaudeThinkingConfig(body, budget)
|
||||
}
|
||||
|
||||
// disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking.
|
||||
// Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool.
|
||||
// See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations
|
||||
@@ -587,51 +571,6 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
|
||||
return body
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveClaudeConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -72,18 +73,15 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://chatgpt.com/backend-api/codex"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
@@ -93,17 +91,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
|
||||
body = sdktranslator.TranslateRequest(from, to, model, body, false)
|
||||
body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
|
||||
body = misc.StripCodexUserAgent(body)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "codex")
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
body, _ = sjson.SetBytes(body, "stream", true)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
|
||||
@@ -182,18 +178,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://chatgpt.com/backend-api/codex"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
@@ -203,20 +196,17 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
|
||||
body = sdktranslator.TranslateRequest(from, to, model, body, true)
|
||||
body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
|
||||
body = misc.StripCodexUserAgent(body)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "codex")
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||
@@ -303,25 +293,23 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
userAgent := codexUserAgent(ctx)
|
||||
body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
|
||||
body = sdktranslator.TranslateRequest(from, to, model, body, false)
|
||||
body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
|
||||
body = misc.StripCodexUserAgent(body)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "codex")
|
||||
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
|
||||
body, _ = sjson.SetBytes(body, "stream", false)
|
||||
|
||||
enc, err := tokenizerForCodexModel(model)
|
||||
enc, err := tokenizerForCodexModel(baseModel)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err)
|
||||
}
|
||||
@@ -593,51 +581,6 @@ func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
|
||||
return
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveCodexConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) resolveCodexConfig(auth *cliproxyauth.Auth) *config.CodexKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -102,28 +103,30 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
// Execute performs a non-streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
|
||||
|
||||
basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -133,9 +136,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
}
|
||||
|
||||
projectID := resolveGeminiProjectID(auth)
|
||||
models := cliPreviewFallbackOrder(req.Model)
|
||||
if len(models) == 0 || models[0] != req.Model {
|
||||
models = append([]string{req.Model}, models...)
|
||||
models := cliPreviewFallbackOrder(baseModel)
|
||||
if len(models) == 0 || models[0] != baseModel {
|
||||
models = append([]string{baseModel}, models...)
|
||||
}
|
||||
|
||||
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -246,34 +249,36 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
|
||||
|
||||
basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
|
||||
|
||||
projectID := resolveGeminiProjectID(auth)
|
||||
|
||||
models := cliPreviewFallbackOrder(req.Model)
|
||||
if len(models) == 0 || models[0] != req.Model {
|
||||
models = append([]string{req.Model}, models...)
|
||||
models := cliPreviewFallbackOrder(baseModel)
|
||||
if len(models) == 0 || models[0] != baseModel {
|
||||
models = append([]string{baseModel}, models...)
|
||||
}
|
||||
|
||||
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -435,6 +440,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
@@ -443,9 +450,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
|
||||
models := cliPreviewFallbackOrder(req.Model)
|
||||
if len(models) == 0 || models[0] != req.Model {
|
||||
models = append([]string{req.Model}, models...)
|
||||
models := cliPreviewFallbackOrder(baseModel)
|
||||
if len(models) == 0 || models[0] != baseModel {
|
||||
models = append([]string{baseModel}, models...)
|
||||
}
|
||||
|
||||
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -463,15 +470,15 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
// The loop variable attemptModel is only used as the concrete model id sent to the upstream
|
||||
// Gemini CLI endpoint when iterating fallback variants.
|
||||
for _, attemptModel := range models {
|
||||
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
||||
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
|
||||
for range models {
|
||||
payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini-cli")
|
||||
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = deleteJSONField(payload, "request.safetySettings")
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiCLIImageAspectRatio(req.Model, payload)
|
||||
payload = fixGeminiCLIImageAspectRatio(baseModel, payload)
|
||||
|
||||
tok, errTok := tokenSource.Token()
|
||||
if errTok != nil {
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -102,16 +103,13 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
|
||||
// - cliproxyexecutor.Response: The response from the API
|
||||
// - error: An error if the request fails
|
||||
func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
// Official Gemini API via API key or OAuth bearer
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
@@ -119,15 +117,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -136,7 +133,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
}
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -206,34 +203,30 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini API.
|
||||
func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -331,27 +324,25 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini API.
|
||||
func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model)
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
|
||||
|
||||
translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "countTokens")
|
||||
|
||||
requestBody := bytes.NewReader(translatedReq)
|
||||
|
||||
@@ -450,51 +441,6 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string {
|
||||
return base
|
||||
}
|
||||
|
||||
func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveGeminiConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
|
||||
vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -155,30 +155,26 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut
|
||||
// executeWithServiceAccount handles authentication using service account credentials.
|
||||
// This method contains the original service account authentication logic.
|
||||
func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -187,7 +183,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
}
|
||||
}
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -258,35 +254,26 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
|
||||
// executeWithAPIKey handles authentication using API key credentials.
|
||||
func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -299,7 +286,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action)
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -367,33 +354,29 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
|
||||
// executeStreamWithServiceAccount handles streaming authentication using service account credentials.
|
||||
func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -487,41 +470,32 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
|
||||
// executeStreamWithAPIKey handles streaming authentication using API key credentials.
|
||||
func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -612,26 +586,24 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
|
||||
// countTokensWithServiceAccount counts tokens using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model)
|
||||
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
|
||||
|
||||
translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
@@ -688,10 +660,6 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
@@ -699,24 +667,17 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
|
||||
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
|
||||
|
||||
translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
@@ -726,7 +687,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
@@ -780,10 +741,6 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
@@ -870,53 +827,6 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau
|
||||
return tok.AccessToken, nil
|
||||
}
|
||||
|
||||
// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration.
|
||||
// It matches the requested model alias against configured models and returns the actual upstream name.
|
||||
func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveVertexConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth.
|
||||
func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -67,6 +68,8 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
|
||||
|
||||
// Execute performs a non-streaming chat completion request.
|
||||
func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := iflowCreds(auth)
|
||||
if strings.TrimSpace(apiKey) == "" {
|
||||
err = fmt.Errorf("iflow executor: missing api key")
|
||||
@@ -76,7 +79,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
baseURL = iflowauth.DefaultAPIBaseURL
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -85,17 +88,14 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyIFlowThinkingConfig(body)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "iflow")
|
||||
|
||||
body = preserveReasoningContentInMessages(body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||
|
||||
@@ -154,6 +154,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
reporter.ensurePublished(ctx)
|
||||
|
||||
var param any
|
||||
// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
|
||||
// the original model name in the response for client compatibility.
|
||||
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
||||
return resp, nil
|
||||
@@ -161,6 +163,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
|
||||
// ExecuteStream performs a streaming chat completion request.
|
||||
func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := iflowCreds(auth)
|
||||
if strings.TrimSpace(apiKey) == "" {
|
||||
err = fmt.Errorf("iflow executor: missing api key")
|
||||
@@ -170,7 +174,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
baseURL = iflowauth.DefaultAPIBaseURL
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -179,23 +183,19 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "iflow")
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
body = applyIFlowThinkingConfig(body)
|
||||
body = preserveReasoningContentInMessages(body)
|
||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||
body = ensureToolsArray(body)
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||
|
||||
@@ -278,11 +278,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
|
||||
func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
enc, err := tokenizerForModel(req.Model)
|
||||
enc, err := tokenizerForModel(baseModel)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err)
|
||||
}
|
||||
@@ -520,41 +522,3 @@ func preserveReasoningContentInMessages(body []byte) []byte {
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
|
||||
// This should be called after NormalizeThinkingConfig has processed the payload.
|
||||
//
|
||||
// Model-specific handling:
|
||||
// - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
|
||||
// - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
||||
func applyIFlowThinkingConfig(body []byte) []byte {
|
||||
effort := gjson.GetBytes(body, "reasoning_effort")
|
||||
if !effort.Exists() {
|
||||
return body
|
||||
}
|
||||
|
||||
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||
val := strings.ToLower(strings.TrimSpace(effort.String()))
|
||||
enableThinking := val != "none" && val != ""
|
||||
|
||||
// Remove reasoning_effort as we'll convert to model-specific format
|
||||
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
||||
body, _ = sjson.DeleteBytes(body, "thinking")
|
||||
|
||||
// GLM-4.6/4.7: Use chat_template_kwargs
|
||||
if strings.HasPrefix(model, "glm-4") {
|
||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||
if enableThinking {
|
||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
// MiniMax M2/M2.1: Use reasoning_split
|
||||
if strings.HasPrefix(model, "minimax-m2") {
|
||||
body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
|
||||
return body
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
67
internal/runtime/executor/iflow_executor_test.go
Normal file
67
internal/runtime/executor/iflow_executor_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
)
|
||||
|
||||
func TestIFlowExecutorParseSuffix(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantBase string
|
||||
wantLevel string
|
||||
}{
|
||||
{"no suffix", "glm-4", "glm-4", ""},
|
||||
{"glm with suffix", "glm-4.1-flash(high)", "glm-4.1-flash", "high"},
|
||||
{"minimax no suffix", "minimax-m2", "minimax-m2", ""},
|
||||
{"minimax with suffix", "minimax-m2.1(medium)", "minimax-m2.1", "medium"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := thinking.ParseSuffix(tt.model)
|
||||
if result.ModelName != tt.wantBase {
|
||||
t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreserveReasoningContentInMessages(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input []byte
|
||||
want []byte // nil means output should equal input
|
||||
}{
|
||||
{
|
||||
"non-glm model passthrough",
|
||||
[]byte(`{"model":"gpt-4","messages":[]}`),
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"glm model with empty messages",
|
||||
[]byte(`{"model":"glm-4","messages":[]}`),
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"glm model preserves existing reasoning_content",
|
||||
[]byte(`{"model":"glm-4","messages":[{"role":"assistant","content":"hi","reasoning_content":"thinking..."}]}`),
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := preserveReasoningContentInMessages(tt.input)
|
||||
want := tt.want
|
||||
if want == nil {
|
||||
want = tt.input
|
||||
}
|
||||
if string(got) != string(want) {
|
||||
t.Errorf("preserveReasoningContentInMessages() = %s, want %s", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -69,7 +70,9 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
baseURL, apiKey := e.resolveCredentials(auth)
|
||||
@@ -85,19 +88,11 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
|
||||
modelOverride := e.resolveUpstreamModel(req.Model, auth)
|
||||
if modelOverride != "" {
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
|
||||
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
|
||||
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
|
||||
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
|
||||
|
||||
translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
@@ -168,7 +163,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
baseURL, apiKey := e.resolveCredentials(auth)
|
||||
@@ -176,25 +173,18 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
err = statusErr{code: http.StatusUnauthorized, msg: "missing provider baseURL"}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
modelOverride := e.resolveUpstreamModel(req.Model, auth)
|
||||
if modelOverride != "" {
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
|
||||
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
|
||||
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
|
||||
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
|
||||
|
||||
translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
@@ -293,15 +283,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
modelForCounting := req.Model
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
modelForCounting = modelOverride
|
||||
}
|
||||
modelForCounting := baseModel
|
||||
|
||||
translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
|
||||
|
||||
enc, err := tokenizerForModel(modelForCounting)
|
||||
if err != nil {
|
||||
@@ -336,53 +326,6 @@ func (e *OpenAICompatExecutor) resolveCredentials(auth *cliproxyauth.Auth) (base
|
||||
return
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
if alias == "" || auth == nil || e.cfg == nil {
|
||||
return ""
|
||||
}
|
||||
compat := e.resolveCompatConfig(auth)
|
||||
if compat == nil {
|
||||
return ""
|
||||
}
|
||||
for i := range compat.Models {
|
||||
model := compat.Models[i]
|
||||
if model.Alias != "" {
|
||||
if strings.EqualFold(model.Alias, alias) {
|
||||
if model.Name != "" {
|
||||
return model.Name
|
||||
}
|
||||
return alias
|
||||
}
|
||||
continue
|
||||
}
|
||||
if strings.EqualFold(model.Name, alias) {
|
||||
return model.Name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
|
||||
trimmed := strings.TrimSpace(model)
|
||||
if trimmed == "" || e == nil || e.cfg == nil {
|
||||
return false
|
||||
}
|
||||
compat := e.resolveCompatConfig(auth)
|
||||
if compat == nil || len(compat.Models) == 0 {
|
||||
return false
|
||||
}
|
||||
for i := range compat.Models {
|
||||
entry := compat.Models[i]
|
||||
if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
|
||||
return true
|
||||
}
|
||||
if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
|
||||
@@ -1,109 +1,13 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||
func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
// Use the alias from metadata if available, as it's registered in the global registry
|
||||
// with thinking metadata; the upstream model name may not be registered.
|
||||
lookupModel := util.ResolveOriginalModel(model, metadata)
|
||||
|
||||
// Determine which model to use for thinking support check.
|
||||
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
|
||||
thinkingModel := lookupModel
|
||||
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
|
||||
thinkingModel = model
|
||||
}
|
||||
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(thinkingModel) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||
func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||
// Use the alias from metadata if available, as it's registered in the global registry
|
||||
// with thinking metadata; the upstream model name may not be registered.
|
||||
lookupModel := util.ResolveOriginalModel(model, metadata)
|
||||
|
||||
// Determine which model to use for thinking support check.
|
||||
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
|
||||
thinkingModel := lookupModel
|
||||
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
|
||||
thinkingModel = model
|
||||
}
|
||||
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(thinkingModel) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
|
||||
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
|
||||
// overwriting caller-provided values to honor suffix/default metadata priority.
|
||||
func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
if field == "" {
|
||||
return payload
|
||||
}
|
||||
baseModel := util.ResolveOriginalModel(model, metadata)
|
||||
if baseModel == "" {
|
||||
baseModel = model
|
||||
}
|
||||
if !util.ModelSupportsThinking(baseModel) && !allowCompat {
|
||||
return payload
|
||||
}
|
||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||
if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
|
||||
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
|
||||
if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
|
||||
if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
|
||||
if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
|
||||
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
|
||||
// paths as relative to the provided root path (for example, "request" for Gemini CLI)
|
||||
// and restricts matches to the given protocol when supplied. Defaults are checked
|
||||
@@ -256,102 +160,3 @@ func matchModelPattern(pattern, model string) bool {
|
||||
}
|
||||
return pi == len(pattern)
|
||||
}
|
||||
|
||||
// NormalizeThinkingConfig normalizes thinking-related fields in the payload
|
||||
// based on model capabilities. For models without thinking support, it strips
|
||||
// reasoning fields. For models with level-based thinking, it validates and
|
||||
// normalizes the reasoning effort level. For models with numeric budget thinking,
|
||||
// it strips the effort string fields.
|
||||
func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return payload
|
||||
}
|
||||
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
if allowCompat {
|
||||
return payload
|
||||
}
|
||||
return StripThinkingFields(payload, false)
|
||||
}
|
||||
|
||||
if util.ModelUsesThinkingLevels(model) {
|
||||
return NormalizeReasoningEffortLevel(payload, model)
|
||||
}
|
||||
|
||||
// Model supports thinking but uses numeric budgets, not levels.
|
||||
// Strip effort string fields since they are not applicable.
|
||||
return StripThinkingFields(payload, true)
|
||||
}
|
||||
|
||||
// StripThinkingFields removes thinking-related fields from the payload for
|
||||
// models that do not support thinking. If effortOnly is true, only removes
|
||||
// effort string fields (for models using numeric budgets).
|
||||
func StripThinkingFields(payload []byte, effortOnly bool) []byte {
|
||||
fieldsToRemove := []string{
|
||||
"reasoning_effort",
|
||||
"reasoning.effort",
|
||||
}
|
||||
if !effortOnly {
|
||||
fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...)
|
||||
}
|
||||
out := payload
|
||||
for _, field := range fieldsToRemove {
|
||||
if gjson.GetBytes(out, field).Exists() {
|
||||
out, _ = sjson.DeleteBytes(out, field)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
|
||||
// or reasoning.effort field for level-based thinking models.
|
||||
func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
|
||||
out := payload
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
|
||||
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
|
||||
// downgrading requests.
|
||||
func ValidateThinkingConfig(payload []byte, model string) error {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return nil
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
|
||||
return nil
|
||||
}
|
||||
|
||||
levels := util.GetModelThinkingLevels(model)
|
||||
checkField := func(path string) error {
|
||||
if effort := gjson.GetBytes(payload, path); effort.Exists() {
|
||||
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
|
||||
return statusErr{
|
||||
code: http.StatusBadRequest,
|
||||
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := checkField("reasoning_effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkField("reasoning.effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -65,12 +66,14 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
token, baseURL := qwenCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, baseURL := qwenCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://portal.qwen.ai/v1"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -79,15 +82,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "openai")
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
@@ -140,18 +141,22 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
reporter.publish(ctx, parseOpenAIUsage(data))
|
||||
var param any
|
||||
// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
|
||||
// the original model name in the response for client compatibility.
|
||||
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
token, baseURL := qwenCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, baseURL := qwenCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://portal.qwen.ai/v1"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -160,15 +165,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, _ = thinking.ApplyThinking(body, req.Model, "openai")
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||
// This will have no real consequences. It's just to scare Qwen3.
|
||||
@@ -176,7 +178,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
@@ -256,13 +258,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
modelName := gjson.GetBytes(body, "model").String()
|
||||
if strings.TrimSpace(modelName) == "" {
|
||||
modelName = req.Model
|
||||
modelName = baseModel
|
||||
}
|
||||
|
||||
enc, err := tokenizerForModel(modelName)
|
||||
|
||||
30
internal/runtime/executor/qwen_executor_test.go
Normal file
30
internal/runtime/executor/qwen_executor_test.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
)
|
||||
|
||||
func TestQwenExecutorParseSuffix(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantBase string
|
||||
wantLevel string
|
||||
}{
|
||||
{"no suffix", "qwen-max", "qwen-max", ""},
|
||||
{"with level suffix", "qwen-max(high)", "qwen-max", "high"},
|
||||
{"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"},
|
||||
{"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := thinking.ParseSuffix(tt.model)
|
||||
if result.ModelName != tt.wantBase {
|
||||
t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
10
internal/runtime/executor/thinking_providers.go
Normal file
10
internal/runtime/executor/thinking_providers.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
|
||||
)
|
||||
430
internal/thinking/apply.go
Normal file
430
internal/thinking/apply.go
Normal file
@@ -0,0 +1,430 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// providerAppliers maps provider names to their ProviderApplier implementations.
|
||||
var providerAppliers = map[string]ProviderApplier{
|
||||
"gemini": nil,
|
||||
"gemini-cli": nil,
|
||||
"claude": nil,
|
||||
"openai": nil,
|
||||
"codex": nil,
|
||||
"iflow": nil,
|
||||
"antigravity": nil,
|
||||
}
|
||||
|
||||
// GetProviderApplier returns the ProviderApplier for the given provider name.
|
||||
// Returns nil if the provider is not registered.
|
||||
func GetProviderApplier(provider string) ProviderApplier {
|
||||
return providerAppliers[provider]
|
||||
}
|
||||
|
||||
// RegisterProvider registers a provider applier by name.
|
||||
func RegisterProvider(name string, applier ProviderApplier) {
|
||||
providerAppliers[name] = applier
|
||||
}
|
||||
|
||||
// IsUserDefinedModel reports whether the model is a user-defined model that should
|
||||
// have thinking configuration passed through without validation.
|
||||
//
|
||||
// User-defined models are configured via config file's models[] array
|
||||
// (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models
|
||||
// are marked with UserDefined=true at registration time.
|
||||
//
|
||||
// User-defined models should have their thinking configuration applied directly,
|
||||
// letting the upstream service validate the configuration.
|
||||
func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
|
||||
if modelInfo == nil {
|
||||
return false
|
||||
}
|
||||
return modelInfo.UserDefined
|
||||
}
|
||||
|
||||
// ApplyThinking applies thinking configuration to a request body.
|
||||
//
|
||||
// This is the unified entry point for all providers. It follows the processing
|
||||
// order defined in FR25: route check → model capability query → config extraction
|
||||
// → validation → application.
|
||||
//
|
||||
// Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
|
||||
// the suffix configuration takes priority over any thinking parameters in the request body.
|
||||
// This enables users to override thinking settings via the model name without modifying their
|
||||
// request payload.
|
||||
//
|
||||
// Parameters:
|
||||
// - body: Original request body JSON
|
||||
// - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
|
||||
// - provider: Provider name (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
|
||||
//
|
||||
// Returns:
|
||||
// - Modified request body JSON with thinking configuration applied
|
||||
// - Error if validation fails (ThinkingError). On error, the original body
|
||||
// is returned (not nil) to enable defensive programming patterns.
|
||||
//
|
||||
// Passthrough behavior (returns original body without error):
|
||||
// - Unknown provider (not in providerAppliers map)
|
||||
// - modelInfo is nil (model not found in registry)
|
||||
// - modelInfo.Thinking is nil (model doesn't support thinking)
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// // With suffix - suffix config takes priority
|
||||
// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini")
|
||||
//
|
||||
// // Without suffix - uses body config
|
||||
// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini")
|
||||
func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
|
||||
// 1. Route check: Get provider applier
|
||||
applier := GetProviderApplier(provider)
|
||||
if applier == nil {
|
||||
log.WithField("provider", provider).Debug("thinking: unknown provider, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 2. Parse suffix and get modelInfo
|
||||
suffixResult := ParseSuffix(model)
|
||||
baseModel := suffixResult.ModelName
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(baseModel)
|
||||
|
||||
// 3. Model capability check
|
||||
if modelInfo == nil {
|
||||
log.WithField("model", model).Debug("thinking: nil modelInfo, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
if IsUserDefinedModel(modelInfo) {
|
||||
return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
|
||||
}
|
||||
config := extractThinkingConfig(body, provider)
|
||||
if hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
}).Debug("thinking: model does not support thinking, stripping config")
|
||||
return StripThinkingConfig(body, provider), nil
|
||||
}
|
||||
log.WithField("model", modelInfo.ID).Debug("thinking: model does not support thinking, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 4. Get config: suffix priority over body
|
||||
var config ThinkingConfig
|
||||
if suffixResult.HasSuffix {
|
||||
config = parseSuffixToConfig(suffixResult.RawSuffix)
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"raw_suffix": suffixResult.RawSuffix,
|
||||
"config": config,
|
||||
}).Debug("thinking: using suffix config (priority)")
|
||||
} else {
|
||||
config = extractThinkingConfig(body, provider)
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
"config": config,
|
||||
}).Debug("thinking: extracted config from request body")
|
||||
}
|
||||
|
||||
if !hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
}).Debug("thinking: no config found, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 5. Validate and normalize configuration
|
||||
validated, err := ValidateConfig(config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
"error": err.Error(),
|
||||
}).Warn("thinking: validation failed, returning original body")
|
||||
// Return original body on validation failure (defensive programming).
|
||||
// This ensures callers who ignore the error won't receive nil body.
|
||||
// The upstream service will decide how to handle the unmodified request.
|
||||
return body, err
|
||||
}
|
||||
|
||||
// Defensive check: ValidateConfig should never return (nil, nil)
|
||||
if validated == nil {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
}).Warn("thinking: ValidateConfig returned nil config without error, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": modelInfo.ID,
|
||||
"validated": *validated,
|
||||
}).Debug("thinking: applying validated config")
|
||||
|
||||
// 6. Apply configuration using provider-specific applier
|
||||
return applier.Apply(body, *validated, modelInfo)
|
||||
}
|
||||
|
||||
// parseSuffixToConfig converts a raw suffix string to ThinkingConfig.
|
||||
//
|
||||
// Parsing priority:
|
||||
// 1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto
|
||||
// 2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel
|
||||
// 3. Numeric values: positive integers → ModeBudget, 0 → ModeNone
|
||||
//
|
||||
// If none of the above match, returns empty ThinkingConfig (treated as no config).
|
||||
func parseSuffixToConfig(rawSuffix string) ThinkingConfig {
|
||||
// 1. Try special values first (none, auto, -1)
|
||||
if mode, ok := ParseSpecialSuffix(rawSuffix); ok {
|
||||
switch mode {
|
||||
case ModeNone:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case ModeAuto:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Try level parsing (minimal, low, medium, high, xhigh)
|
||||
if level, ok := ParseLevelSuffix(rawSuffix); ok {
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: level}
|
||||
}
|
||||
|
||||
// 3. Try numeric parsing
|
||||
if budget, ok := ParseNumericSuffix(rawSuffix); ok {
|
||||
if budget == 0 {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: budget}
|
||||
}
|
||||
|
||||
// Unknown suffix format - return empty config
|
||||
log.WithField("raw_suffix", rawSuffix).Debug("thinking: unknown suffix format, treating as no config")
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// applyUserDefinedModel applies thinking configuration for user-defined models
|
||||
// without ThinkingSupport validation.
|
||||
func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider string, suffixResult SuffixResult) ([]byte, error) {
|
||||
// Get config: suffix priority over body
|
||||
var config ThinkingConfig
|
||||
if suffixResult.HasSuffix {
|
||||
config = parseSuffixToConfig(suffixResult.RawSuffix)
|
||||
} else {
|
||||
config = extractThinkingConfig(body, provider)
|
||||
}
|
||||
|
||||
if !hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
"user_defined": true,
|
||||
"passthrough": true,
|
||||
}).Debug("thinking: user-defined model, no config, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
applier := GetProviderApplier(provider)
|
||||
if applier == nil {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
"user_defined": true,
|
||||
"passthrough": true,
|
||||
}).Debug("thinking: user-defined model, unknown provider, passthrough")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelInfo.ID,
|
||||
"provider": provider,
|
||||
"user_defined": true,
|
||||
"passthrough": false,
|
||||
"config": config,
|
||||
}).Debug("thinking: applying config for user-defined model (skip validation)")
|
||||
|
||||
return applier.Apply(body, config, modelInfo)
|
||||
}
|
||||
|
||||
// extractThinkingConfig extracts provider-specific thinking config from request body.
|
||||
func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
switch provider {
|
||||
case "claude":
|
||||
return extractClaudeConfig(body)
|
||||
case "gemini", "gemini-cli", "antigravity":
|
||||
return extractGeminiConfig(body, provider)
|
||||
case "openai":
|
||||
return extractOpenAIConfig(body)
|
||||
case "codex":
|
||||
return extractCodexConfig(body)
|
||||
case "iflow":
|
||||
return extractIFlowConfig(body)
|
||||
default:
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
}
|
||||
|
||||
func hasThinkingConfig(config ThinkingConfig) bool {
|
||||
return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
|
||||
}
|
||||
|
||||
// extractClaudeConfig extracts thinking configuration from Claude format request body.
|
||||
//
|
||||
// Claude API format:
|
||||
// - thinking.type: "enabled" or "disabled"
|
||||
// - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget)
|
||||
//
|
||||
// Priority: thinking.type="disabled" takes precedence over budget_tokens.
|
||||
// When type="enabled" without budget_tokens, returns ModeAuto to indicate
|
||||
// the user wants thinking enabled but didn't specify a budget.
|
||||
func extractClaudeConfig(body []byte) ThinkingConfig {
|
||||
thinkingType := gjson.GetBytes(body, "thinking.type").String()
|
||||
if thinkingType == "disabled" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
// Check budget_tokens
|
||||
if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() {
|
||||
value := int(budget.Int())
|
||||
switch value {
|
||||
case 0:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case -1:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: value}
|
||||
}
|
||||
}
|
||||
|
||||
// If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified)
|
||||
if thinkingType == "enabled" {
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractGeminiConfig extracts thinking configuration from Gemini format request body.
|
||||
//
|
||||
// Gemini API format:
|
||||
// - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3)
|
||||
// - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5)
|
||||
//
|
||||
// For gemini-cli and antigravity providers, the path is prefixed with "request.".
|
||||
//
|
||||
// Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format).
|
||||
// This allows newer Gemini 3 level-based configs to take precedence.
|
||||
func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
|
||||
prefix := "generationConfig.thinkingConfig"
|
||||
if provider == "gemini-cli" || provider == "antigravity" {
|
||||
prefix = "request.generationConfig.thinkingConfig"
|
||||
}
|
||||
|
||||
// Check thinkingLevel first (Gemini 3 format takes precedence)
|
||||
if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
|
||||
value := level.String()
|
||||
switch value {
|
||||
case "none":
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case "auto":
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
}
|
||||
|
||||
// Check thinkingBudget (Gemini 2.5 format)
|
||||
if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
|
||||
value := int(budget.Int())
|
||||
switch value {
|
||||
case 0:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case -1:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: value}
|
||||
}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractOpenAIConfig extracts thinking configuration from OpenAI format request body.
|
||||
//
|
||||
// OpenAI API format:
|
||||
// - reasoning_effort: "none", "low", "medium", "high" (discrete levels)
|
||||
//
|
||||
// OpenAI uses level-based thinking configuration only, no numeric budget support.
|
||||
// The "none" value is treated specially to return ModeNone.
|
||||
func extractOpenAIConfig(body []byte) ThinkingConfig {
|
||||
// Check reasoning_effort (OpenAI Chat Completions format)
|
||||
if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() {
|
||||
value := effort.String()
|
||||
if value == "none" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractCodexConfig extracts thinking configuration from Codex format request body.
|
||||
//
|
||||
// Codex API format (OpenAI Responses API):
|
||||
// - reasoning.effort: "none", "low", "medium", "high"
|
||||
//
|
||||
// This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort".
|
||||
func extractCodexConfig(body []byte) ThinkingConfig {
|
||||
// Check reasoning.effort (Codex / OpenAI Responses API format)
|
||||
if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() {
|
||||
value := effort.String()
|
||||
if value == "none" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractIFlowConfig extracts thinking configuration from iFlow format request body.
|
||||
//
|
||||
// iFlow API format (supports multiple model families):
|
||||
// - GLM format: chat_template_kwargs.enable_thinking (boolean)
|
||||
// - MiniMax format: reasoning_split (boolean)
|
||||
//
|
||||
// Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled".
|
||||
// The actual budget/configuration is determined by the iFlow applier based on model capabilities.
|
||||
// Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off.
|
||||
func extractIFlowConfig(body []byte) ThinkingConfig {
|
||||
// GLM format: chat_template_kwargs.enable_thinking
|
||||
if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() {
|
||||
if enabled.Bool() {
|
||||
// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: 1}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
// MiniMax format: reasoning_split
|
||||
if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() {
|
||||
if split.Bool() {
|
||||
// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: 1}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
144
internal/thinking/apply_main_test.go
Normal file
144
internal/thinking/apply_main_test.go
Normal file
@@ -0,0 +1,144 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// setupTestModels registers test models in the global registry for testing.
|
||||
// This is required because ApplyThinking now looks up models by name.
|
||||
func setupTestModels(t *testing.T) func() {
|
||||
t.Helper()
|
||||
reg := registry.GetGlobalRegistry()
|
||||
|
||||
// Register test models via RegisterClient (the correct API)
|
||||
clientID := "test-thinking-client"
|
||||
testModels := []*registry.ModelInfo{
|
||||
{ID: "test-thinking-model", Thinking: ®istry.ThinkingSupport{Min: 1, Max: 10}},
|
||||
{ID: "test-no-thinking", Type: "gemini"},
|
||||
{ID: "gpt-5.2-test", Thinking: ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "medium", "high"}}},
|
||||
}
|
||||
|
||||
reg.RegisterClient(clientID, "test", testModels)
|
||||
|
||||
// Return cleanup function
|
||||
return func() {
|
||||
reg.UnregisterClient(clientID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingPassthrough(t *testing.T) {
|
||||
cleanup := setupTestModels(t)
|
||||
defer cleanup()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
}{
|
||||
{"unknown provider", `{"a":1}`, "test-thinking-model", "unknown"},
|
||||
{"unknown model", `{"a":1}`, "nonexistent-model", "gemini"},
|
||||
{"nil thinking support", `{"a":1}`, "test-no-thinking", "gemini"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err != nil {
|
||||
t.Fatalf("ApplyThinking() error = %v", err)
|
||||
}
|
||||
if string(got) != tt.body {
|
||||
t.Fatalf("ApplyThinking() = %s, want %s", string(got), tt.body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingValidationError(t *testing.T) {
|
||||
cleanup := setupTestModels(t)
|
||||
defer cleanup()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
}{
|
||||
{"unsupported level", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err == nil {
|
||||
t.Fatalf("ApplyThinking() error = nil, want error")
|
||||
}
|
||||
// On validation error, ApplyThinking returns original body (defensive programming)
|
||||
if string(got) != tt.body {
|
||||
t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingSuffixPriority(t *testing.T) {
|
||||
cleanup := setupTestModels(t)
|
||||
defer cleanup()
|
||||
|
||||
// Register a model that supports thinking with budget
|
||||
reg := registry.GetGlobalRegistry()
|
||||
suffixClientID := "test-suffix-client"
|
||||
testModels := []*registry.ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-pro-suffix-test",
|
||||
Thinking: ®istry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true},
|
||||
},
|
||||
}
|
||||
reg.RegisterClient(suffixClientID, "gemini", testModels)
|
||||
defer reg.UnregisterClient(suffixClientID)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
checkPath string
|
||||
expectedValue int
|
||||
}{
|
||||
{
|
||||
"suffix overrides body config",
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
|
||||
"gemini-2.5-pro-suffix-test(8192)",
|
||||
"gemini",
|
||||
"generationConfig.thinkingConfig.thinkingBudget",
|
||||
8192,
|
||||
},
|
||||
{
|
||||
"suffix none disables thinking",
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
|
||||
"gemini-2.5-pro-suffix-test(none)",
|
||||
"gemini",
|
||||
"generationConfig.thinkingConfig.thinkingBudget",
|
||||
0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err != nil {
|
||||
t.Fatalf("ApplyThinking() error = %v", err)
|
||||
}
|
||||
|
||||
// Use gjson to check the value
|
||||
result := int(gjson.GetBytes(got, tt.checkPath).Int())
|
||||
if result != tt.expectedValue {
|
||||
t.Fatalf("ApplyThinking() %s = %v, want %v", tt.checkPath, result, tt.expectedValue)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
501
internal/thinking/apply_test.go
Normal file
501
internal/thinking/apply_test.go
Normal file
@@ -0,0 +1,501 @@
|
||||
// Package thinking_test provides external tests for the thinking package.
|
||||
package thinking_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
|
||||
)
|
||||
|
||||
// registerTestModels sets up test models in the registry and returns a cleanup function.
|
||||
func registerTestModels(t *testing.T) func() {
|
||||
t.Helper()
|
||||
reg := registry.GetGlobalRegistry()
|
||||
|
||||
testModels := []*registry.ModelInfo{
|
||||
geminiBudgetModel(),
|
||||
geminiLevelModel(),
|
||||
claudeBudgetModel(),
|
||||
openAILevelModel(),
|
||||
iFlowModel(),
|
||||
{ID: "claude-3"},
|
||||
{ID: "gemini-2.5-pro-strip"},
|
||||
{ID: "glm-4.6-strip"},
|
||||
}
|
||||
|
||||
clientID := "test-thinking-models"
|
||||
reg.RegisterClient(clientID, "test", testModels)
|
||||
|
||||
return func() {
|
||||
reg.UnregisterClient(clientID)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyThinking tests the main ApplyThinking entry point.
|
||||
//
|
||||
// ApplyThinking is the unified entry point for applying thinking configuration.
|
||||
// It routes to the appropriate provider-specific applier based on model.
|
||||
//
|
||||
// Depends on: Epic 10 Story 10-2 (apply-thinking main entry)
|
||||
func TestApplyThinking(t *testing.T) {
|
||||
cleanup := registerTestModels(t)
|
||||
defer cleanup()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
check string
|
||||
}{
|
||||
{"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini-2.5-pro-test", "gemini", "geminiBudget"},
|
||||
{"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini-3-pro-preview-test", "gemini", "geminiLevel"},
|
||||
{"gemini-cli budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "gemini-cli", "geminiCliBudget"},
|
||||
{"antigravity budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "antigravity", "geminiCliBudget"},
|
||||
{"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude-sonnet-4-5-test", "claude", "claudeBudget"},
|
||||
{"claude enabled type auto", `{"thinking":{"type":"enabled"}}`, "claude-sonnet-4-5-test", "claude", "claudeAuto"},
|
||||
{"openai level", `{"reasoning_effort":"high"}`, "gpt-5.2-test", "openai", "openaiLevel"},
|
||||
{"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "glm-4.6-test", "iflow", "iflowEnable"},
|
||||
{"unknown provider passthrough", `{"a":1}`, "gemini-2.5-pro-test", "unknown", "passthrough"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err != nil {
|
||||
t.Fatalf("ApplyThinking() error = %v", err)
|
||||
}
|
||||
assertApplyThinkingCheck(t, tt.check, tt.body, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingErrors(t *testing.T) {
|
||||
cleanup := registerTestModels(t)
|
||||
defer cleanup()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
}{
|
||||
{"unsupported level openai", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"},
|
||||
{"unsupported level gemini", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"ultra"}}}`, "gemini-3-pro-preview-test", "gemini"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err == nil {
|
||||
t.Fatalf("ApplyThinking() error = nil, want error")
|
||||
}
|
||||
// On validation error, ApplyThinking returns original body (defensive programming)
|
||||
if string(got) != tt.body {
|
||||
t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinkingStripOnUnsupportedModel(t *testing.T) {
|
||||
cleanup := registerTestModels(t)
|
||||
defer cleanup()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
stripped []string
|
||||
preserved []string
|
||||
}{
|
||||
{"claude strip", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude-3", "claude", []string{"thinking"}, []string{"model"}},
|
||||
{"gemini strip", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini-2.5-pro-strip", "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}},
|
||||
{"iflow strip", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "glm-4.6-strip", "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err != nil {
|
||||
t.Fatalf("ApplyThinking() error = %v", err)
|
||||
}
|
||||
|
||||
for _, path := range tt.stripped {
|
||||
if gjson.GetBytes(got, path).Exists() {
|
||||
t.Fatalf("expected %s to be stripped, got %s", path, string(got))
|
||||
}
|
||||
}
|
||||
for _, path := range tt.preserved {
|
||||
if !gjson.GetBytes(got, path).Exists() {
|
||||
t.Fatalf("expected %s to be preserved, got %s", path, string(got))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsUserDefinedModel(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
modelInfo *registry.ModelInfo
|
||||
want bool
|
||||
}{
|
||||
{"nil modelInfo", nil, false},
|
||||
{"not user-defined no flag", ®istry.ModelInfo{ID: "test"}, false},
|
||||
{"not user-defined with type", ®istry.ModelInfo{ID: "test", Type: "openai"}, false},
|
||||
{"user-defined with flag", ®istry.ModelInfo{ID: "test", Type: "openai", UserDefined: true}, true},
|
||||
{"user-defined flag only", ®istry.ModelInfo{ID: "test", UserDefined: true}, true},
|
||||
{"has thinking not user-defined", ®istry.ModelInfo{ID: "test", Type: "openai", Thinking: ®istry.ThinkingSupport{Min: 1024}}, false},
|
||||
{"has thinking with user-defined flag", ®istry.ModelInfo{ID: "test", Type: "openai", Thinking: ®istry.ThinkingSupport{Min: 1024}, UserDefined: true}, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := thinking.IsUserDefinedModel(tt.modelInfo); got != tt.want {
|
||||
t.Fatalf("IsUserDefinedModel() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyThinking_UserDefinedModel(t *testing.T) {
|
||||
// Register user-defined test models
|
||||
reg := registry.GetGlobalRegistry()
|
||||
userDefinedModels := []*registry.ModelInfo{
|
||||
{ID: "custom-gpt", Type: "openai", UserDefined: true},
|
||||
{ID: "or-claude", Type: "openai", UserDefined: true},
|
||||
{ID: "custom-gemini", Type: "gemini", UserDefined: true},
|
||||
{ID: "vertex-flash", Type: "gemini", UserDefined: true},
|
||||
{ID: "cli-gemini", Type: "gemini", UserDefined: true},
|
||||
{ID: "ag-gemini", Type: "gemini", UserDefined: true},
|
||||
{ID: "custom-claude", Type: "claude", UserDefined: true},
|
||||
{ID: "unknown"},
|
||||
}
|
||||
clientID := "test-user-defined-models"
|
||||
reg.RegisterClient(clientID, "test", userDefinedModels)
|
||||
defer reg.UnregisterClient(clientID)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
check string
|
||||
}{
|
||||
{
|
||||
"openai user-defined with reasoning_effort",
|
||||
`{"model":"custom-gpt","reasoning_effort":"high"}`,
|
||||
"custom-gpt",
|
||||
"openai",
|
||||
"openaiCompatible",
|
||||
},
|
||||
{
|
||||
"openai-compatibility model with reasoning_effort",
|
||||
`{"model":"or-claude","reasoning_effort":"high"}`,
|
||||
"or-claude",
|
||||
"openai",
|
||||
"openaiCompatible",
|
||||
},
|
||||
{
|
||||
"gemini user-defined with thinkingBudget",
|
||||
`{"model":"custom-gemini","generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
|
||||
"custom-gemini",
|
||||
"gemini",
|
||||
"geminiCompatibleBudget",
|
||||
},
|
||||
{
|
||||
"vertex user-defined with thinkingBudget",
|
||||
`{"model":"vertex-flash","generationConfig":{"thinkingConfig":{"thinkingBudget":16384}}}`,
|
||||
"vertex-flash",
|
||||
"gemini",
|
||||
"geminiCompatibleBudget16384",
|
||||
},
|
||||
{
|
||||
"gemini-cli user-defined with thinkingBudget",
|
||||
`{"model":"cli-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
|
||||
"cli-gemini",
|
||||
"gemini-cli",
|
||||
"geminiCliCompatibleBudget",
|
||||
},
|
||||
{
|
||||
"antigravity user-defined with thinkingBudget",
|
||||
`{"model":"ag-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
|
||||
"ag-gemini",
|
||||
"antigravity",
|
||||
"geminiCliCompatibleBudget",
|
||||
},
|
||||
{
|
||||
"claude user-defined with thinking",
|
||||
`{"model":"custom-claude","thinking":{"type":"enabled","budget_tokens":8192}}`,
|
||||
"custom-claude",
|
||||
"claude",
|
||||
"claudeCompatibleBudget",
|
||||
},
|
||||
{
|
||||
"user-defined model no config",
|
||||
`{"model":"custom-gpt","messages":[]}`,
|
||||
"custom-gpt",
|
||||
"openai",
|
||||
"passthrough",
|
||||
},
|
||||
{
|
||||
"non-user-defined model strips config",
|
||||
`{"model":"unknown","reasoning_effort":"high"}`,
|
||||
"unknown",
|
||||
"openai",
|
||||
"stripReasoning",
|
||||
},
|
||||
{
|
||||
"user-defined model unknown provider",
|
||||
`{"model":"custom-gpt","reasoning_effort":"high"}`,
|
||||
"custom-gpt",
|
||||
"unknown",
|
||||
"passthrough",
|
||||
},
|
||||
{
|
||||
"unknown model passthrough",
|
||||
`{"model":"nonexistent","reasoning_effort":"high"}`,
|
||||
"nonexistent",
|
||||
"openai",
|
||||
"passthrough",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err != nil {
|
||||
t.Fatalf("ApplyThinking() error = %v", err)
|
||||
}
|
||||
assertCompatibleModelCheck(t, tt.check, tt.body, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyThinkingSuffixPriority tests suffix priority over body config.
|
||||
func TestApplyThinkingSuffixPriority(t *testing.T) {
|
||||
// Register test model
|
||||
reg := registry.GetGlobalRegistry()
|
||||
testModels := []*registry.ModelInfo{
|
||||
{
|
||||
ID: "gemini-suffix-test",
|
||||
Thinking: ®istry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true},
|
||||
},
|
||||
}
|
||||
clientID := "test-suffix-priority"
|
||||
reg.RegisterClient(clientID, "gemini", testModels)
|
||||
defer reg.UnregisterClient(clientID)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
model string
|
||||
provider string
|
||||
checkPath string
|
||||
expectedValue int
|
||||
}{
|
||||
{
|
||||
"suffix overrides body budget",
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
|
||||
"gemini-suffix-test(8192)",
|
||||
"gemini",
|
||||
"generationConfig.thinkingConfig.thinkingBudget",
|
||||
8192,
|
||||
},
|
||||
{
|
||||
"suffix none sets budget to 0",
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
|
||||
"gemini-suffix-test(none)",
|
||||
"gemini",
|
||||
"generationConfig.thinkingConfig.thinkingBudget",
|
||||
0,
|
||||
},
|
||||
{
|
||||
"no suffix uses body config",
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingBudget":5000}}}`,
|
||||
"gemini-suffix-test",
|
||||
"gemini",
|
||||
"generationConfig.thinkingConfig.thinkingBudget",
|
||||
5000,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
|
||||
if err != nil {
|
||||
t.Fatalf("ApplyThinking() error = %v", err)
|
||||
}
|
||||
|
||||
result := int(gjson.GetBytes(got, tt.checkPath).Int())
|
||||
if result != tt.expectedValue {
|
||||
t.Fatalf("ApplyThinking() %s = %v, want %v\nbody: %s", tt.checkPath, result, tt.expectedValue, string(got))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func assertApplyThinkingCheck(t *testing.T, checkName, input string, body []byte) {
|
||||
t.Helper()
|
||||
|
||||
switch checkName {
|
||||
case "geminiBudget":
|
||||
assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
case "geminiLevel":
|
||||
assertJSONString(t, body, "generationConfig.thinkingConfig.thinkingLevel", "high")
|
||||
assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
case "geminiCliBudget":
|
||||
assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
case "claudeBudget":
|
||||
assertJSONString(t, body, "thinking.type", "enabled")
|
||||
assertJSONInt(t, body, "thinking.budget_tokens", 16384)
|
||||
case "claudeAuto":
|
||||
// When type=enabled without budget, auto mode is applied using mid-range budget
|
||||
assertJSONString(t, body, "thinking.type", "enabled")
|
||||
// Budget should be mid-range: (1024 + 128000) / 2 = 64512
|
||||
assertJSONInt(t, body, "thinking.budget_tokens", 64512)
|
||||
case "openaiLevel":
|
||||
assertJSONString(t, body, "reasoning_effort", "high")
|
||||
case "iflowEnable":
|
||||
assertJSONBool(t, body, "chat_template_kwargs.enable_thinking", true)
|
||||
assertJSONBool(t, body, "chat_template_kwargs.clear_thinking", false)
|
||||
case "passthrough":
|
||||
if string(body) != input {
|
||||
t.Fatalf("ApplyThinking() = %s, want %s", string(body), input)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unknown check: %s", checkName)
|
||||
}
|
||||
}
|
||||
|
||||
func assertCompatibleModelCheck(t *testing.T, checkName, input string, body []byte) {
|
||||
t.Helper()
|
||||
|
||||
switch checkName {
|
||||
case "openaiCompatible":
|
||||
assertJSONString(t, body, "reasoning_effort", "high")
|
||||
case "geminiCompatibleBudget":
|
||||
assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
case "geminiCompatibleBudget16384":
|
||||
assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 16384)
|
||||
assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
case "geminiCliCompatibleBudget":
|
||||
assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
case "claudeCompatibleBudget":
|
||||
assertJSONString(t, body, "thinking.type", "enabled")
|
||||
assertJSONInt(t, body, "thinking.budget_tokens", 8192)
|
||||
case "stripReasoning":
|
||||
if gjson.GetBytes(body, "reasoning_effort").Exists() {
|
||||
t.Fatalf("expected reasoning_effort to be stripped, got %s", string(body))
|
||||
}
|
||||
case "passthrough":
|
||||
if string(body) != input {
|
||||
t.Fatalf("ApplyThinking() = %s, want %s", string(body), input)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unknown check: %s", checkName)
|
||||
}
|
||||
}
|
||||
|
||||
func assertJSONString(t *testing.T, body []byte, path, want string) {
|
||||
t.Helper()
|
||||
value := gjson.GetBytes(body, path)
|
||||
if !value.Exists() {
|
||||
t.Fatalf("expected %s to exist", path)
|
||||
}
|
||||
if value.String() != want {
|
||||
t.Fatalf("value at %s = %s, want %s", path, value.String(), want)
|
||||
}
|
||||
}
|
||||
|
||||
func assertJSONInt(t *testing.T, body []byte, path string, want int) {
|
||||
t.Helper()
|
||||
value := gjson.GetBytes(body, path)
|
||||
if !value.Exists() {
|
||||
t.Fatalf("expected %s to exist", path)
|
||||
}
|
||||
if int(value.Int()) != want {
|
||||
t.Fatalf("value at %s = %d, want %d", path, value.Int(), want)
|
||||
}
|
||||
}
|
||||
|
||||
func assertJSONBool(t *testing.T, body []byte, path string, want bool) {
|
||||
t.Helper()
|
||||
value := gjson.GetBytes(body, path)
|
||||
if !value.Exists() {
|
||||
t.Fatalf("expected %s to exist", path)
|
||||
}
|
||||
if value.Bool() != want {
|
||||
t.Fatalf("value at %s = %t, want %t", path, value.Bool(), want)
|
||||
}
|
||||
}
|
||||
|
||||
func geminiBudgetModel() *registry.ModelInfo {
|
||||
return ®istry.ModelInfo{
|
||||
ID: "gemini-2.5-pro-test",
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 128,
|
||||
Max: 32768,
|
||||
ZeroAllowed: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func geminiLevelModel() *registry.ModelInfo {
|
||||
return ®istry.ModelInfo{
|
||||
ID: "gemini-3-pro-preview-test",
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 128,
|
||||
Max: 32768,
|
||||
Levels: []string{"minimal", "low", "medium", "high"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func claudeBudgetModel() *registry.ModelInfo {
|
||||
return ®istry.ModelInfo{
|
||||
ID: "claude-sonnet-4-5-test",
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 1024,
|
||||
Max: 128000,
|
||||
ZeroAllowed: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func openAILevelModel() *registry.ModelInfo {
|
||||
return ®istry.ModelInfo{
|
||||
ID: "gpt-5.2-test",
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 128,
|
||||
Max: 32768,
|
||||
ZeroAllowed: true,
|
||||
Levels: []string{"low", "medium", "high"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func iFlowModel() *registry.ModelInfo {
|
||||
return ®istry.ModelInfo{
|
||||
ID: "glm-4.6-test",
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 1,
|
||||
Max: 10,
|
||||
ZeroAllowed: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
233
internal/thinking/convert.go
Normal file
233
internal/thinking/convert.go
Normal file
@@ -0,0 +1,233 @@
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
// levelToBudgetMap defines the standard Level → Budget mapping.
|
||||
// All keys are lowercase; lookups should use strings.ToLower.
|
||||
var levelToBudgetMap = map[string]int{
|
||||
"none": 0,
|
||||
"auto": -1,
|
||||
"minimal": 512,
|
||||
"low": 1024,
|
||||
"medium": 8192,
|
||||
"high": 24576,
|
||||
"xhigh": 32768,
|
||||
}
|
||||
|
||||
// ConvertLevelToBudget converts a thinking level to a budget value.
|
||||
//
|
||||
// This is a semantic conversion that maps discrete levels to numeric budgets.
|
||||
// Level matching is case-insensitive.
|
||||
//
|
||||
// Level → Budget mapping:
|
||||
// - none → 0
|
||||
// - auto → -1
|
||||
// - minimal → 512
|
||||
// - low → 1024
|
||||
// - medium → 8192
|
||||
// - high → 24576
|
||||
// - xhigh → 32768
|
||||
//
|
||||
// Returns:
|
||||
// - budget: The converted budget value
|
||||
// - ok: true if level is valid, false otherwise
|
||||
func ConvertLevelToBudget(level string) (int, bool) {
|
||||
budget, ok := levelToBudgetMap[strings.ToLower(level)]
|
||||
return budget, ok
|
||||
}
|
||||
|
||||
// BudgetThreshold constants define the upper bounds for each thinking level.
|
||||
// These are used by ConvertBudgetToLevel for range-based mapping.
|
||||
const (
|
||||
// ThresholdMinimal is the upper bound for "minimal" level (1-512)
|
||||
ThresholdMinimal = 512
|
||||
// ThresholdLow is the upper bound for "low" level (513-1024)
|
||||
ThresholdLow = 1024
|
||||
// ThresholdMedium is the upper bound for "medium" level (1025-8192)
|
||||
ThresholdMedium = 8192
|
||||
// ThresholdHigh is the upper bound for "high" level (8193-24576)
|
||||
ThresholdHigh = 24576
|
||||
)
|
||||
|
||||
// ConvertBudgetToLevel converts a budget value to the nearest thinking level.
|
||||
//
|
||||
// This is a semantic conversion that maps numeric budgets to discrete levels.
|
||||
// Uses threshold-based mapping for range conversion.
|
||||
//
|
||||
// Budget → Level thresholds:
|
||||
// - -1 → auto
|
||||
// - 0 → none
|
||||
// - 1-512 → minimal
|
||||
// - 513-1024 → low
|
||||
// - 1025-8192 → medium
|
||||
// - 8193-24576 → high
|
||||
// - 24577+ → xhigh
|
||||
//
|
||||
// Returns:
|
||||
// - level: The converted thinking level string
|
||||
// - ok: true if budget is valid, false for invalid negatives (< -1)
|
||||
func ConvertBudgetToLevel(budget int) (string, bool) {
|
||||
switch {
|
||||
case budget < -1:
|
||||
// Invalid negative values
|
||||
return "", false
|
||||
case budget == -1:
|
||||
return string(LevelAuto), true
|
||||
case budget == 0:
|
||||
return string(LevelNone), true
|
||||
case budget <= ThresholdMinimal:
|
||||
return string(LevelMinimal), true
|
||||
case budget <= ThresholdLow:
|
||||
return string(LevelLow), true
|
||||
case budget <= ThresholdMedium:
|
||||
return string(LevelMedium), true
|
||||
case budget <= ThresholdHigh:
|
||||
return string(LevelHigh), true
|
||||
default:
|
||||
return string(LevelXHigh), true
|
||||
}
|
||||
}
|
||||
|
||||
// ModelCapability describes the thinking format support of a model.
|
||||
type ModelCapability int
|
||||
|
||||
const (
|
||||
// CapabilityUnknown indicates modelInfo is nil (passthrough behavior, internal use).
|
||||
CapabilityUnknown ModelCapability = iota - 1
|
||||
// CapabilityNone indicates model doesn't support thinking (Thinking is nil).
|
||||
CapabilityNone
|
||||
// CapabilityBudgetOnly indicates the model supports numeric budgets only.
|
||||
CapabilityBudgetOnly
|
||||
// CapabilityLevelOnly indicates the model supports discrete levels only.
|
||||
CapabilityLevelOnly
|
||||
// CapabilityHybrid indicates the model supports both budgets and levels.
|
||||
CapabilityHybrid
|
||||
)
|
||||
|
||||
// detectModelCapability determines the thinking format capability of a model.
|
||||
//
|
||||
// This is an internal function used by NormalizeForModel to decide conversion strategy.
|
||||
// It analyzes the model's ThinkingSupport configuration to classify the model:
|
||||
// - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking)
|
||||
// - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5)
|
||||
// - CapabilityLevelOnly: Has Levels but no Min/Max (OpenAI, iFlow)
|
||||
// - CapabilityHybrid: Has both Min/Max and Levels (Gemini 3)
|
||||
//
|
||||
// Note: Returns a special sentinel value when modelInfo itself is nil (unknown model).
|
||||
func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability {
|
||||
if modelInfo == nil {
|
||||
return CapabilityUnknown // sentinel for "passthrough" behavior
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return CapabilityNone
|
||||
}
|
||||
support := modelInfo.Thinking
|
||||
hasBudget := support.Min > 0 || support.Max > 0
|
||||
hasLevels := len(support.Levels) > 0
|
||||
|
||||
switch {
|
||||
case hasBudget && hasLevels:
|
||||
return CapabilityHybrid
|
||||
case hasBudget:
|
||||
return CapabilityBudgetOnly
|
||||
case hasLevels:
|
||||
return CapabilityLevelOnly
|
||||
default:
|
||||
return CapabilityNone
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeMixedConfig resolves a thinking configuration when both budget and level
|
||||
// might be present, applying priority rules.
|
||||
//
|
||||
// Priority rules (Level takes precedence over Budget):
|
||||
// - If level is non-empty: use level (special handling for "auto" and "none")
|
||||
// - If level is empty and budget is set: use budget
|
||||
// - If neither is set (budget=0, level=""): return ModeNone
|
||||
//
|
||||
// This function is used internally to handle ambiguous input configurations.
|
||||
func normalizeMixedConfig(budget int, level string) ThinkingConfig {
|
||||
normalizedLevel := strings.ToLower(strings.TrimSpace(level))
|
||||
if normalizedLevel != "" {
|
||||
switch normalizedLevel {
|
||||
case string(LevelAuto):
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1, Level: ThinkingLevel(normalizedLevel)}
|
||||
case string(LevelNone):
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0, Level: ThinkingLevel(normalizedLevel)}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(normalizedLevel)}
|
||||
}
|
||||
}
|
||||
switch budget {
|
||||
case -1:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
case 0:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: budget}
|
||||
}
|
||||
}
|
||||
|
||||
// NormalizeForModel normalizes a thinking configuration for a specific model.
|
||||
//
|
||||
// This function converts the configuration format based on model capabilities:
|
||||
// - Budget-only models (Claude, Gemini 2.5): Level → Budget conversion
|
||||
// - Level-only models (OpenAI, iFlow): Budget → Level conversion
|
||||
// - Hybrid models (Gemini 3): preserve the original format
|
||||
// - No thinking support (Thinking is nil): degrade to ModeNone
|
||||
// - Unknown model (modelInfo is nil): passthrough (preserve original format)
|
||||
//
|
||||
// Parameters:
|
||||
// - config: The thinking configuration to normalize (must not be nil)
|
||||
// - modelInfo: Model registry information containing ThinkingSupport properties
|
||||
//
|
||||
// Returns:
|
||||
// - Normalized ThinkingConfig suitable for the model
|
||||
// - Error if conversion fails (e.g., unsupported level or invalid budget)
|
||||
func NormalizeForModel(config *ThinkingConfig, modelInfo *registry.ModelInfo) (*ThinkingConfig, error) {
|
||||
if config == nil {
|
||||
return nil, fmt.Errorf("thinking config is nil")
|
||||
}
|
||||
|
||||
normalized := *config
|
||||
capability := detectModelCapability(modelInfo)
|
||||
|
||||
// If model doesn't support thinking, degrade to ModeNone
|
||||
if capability == CapabilityNone && config.Mode != ModeNone && config.Mode != ModeAuto {
|
||||
return &ThinkingConfig{Mode: ModeNone, Budget: 0}, nil
|
||||
}
|
||||
|
||||
switch config.Mode {
|
||||
case ModeAuto, ModeNone:
|
||||
return &normalized, nil
|
||||
case ModeBudget:
|
||||
if capability == CapabilityLevelOnly {
|
||||
level, ok := ConvertBudgetToLevel(config.Budget)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid budget: %d", config.Budget)
|
||||
}
|
||||
normalized.Mode = ModeLevel
|
||||
normalized.Level = ThinkingLevel(level)
|
||||
normalized.Budget = 0
|
||||
}
|
||||
return &normalized, nil
|
||||
case ModeLevel:
|
||||
if capability == CapabilityBudgetOnly {
|
||||
budget, ok := ConvertLevelToBudget(string(config.Level))
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown level: %s", config.Level)
|
||||
}
|
||||
normalized.Mode = ModeBudget
|
||||
normalized.Budget = budget
|
||||
normalized.Level = ""
|
||||
}
|
||||
return &normalized, nil
|
||||
default:
|
||||
return &normalized, nil
|
||||
}
|
||||
}
|
||||
277
internal/thinking/convert_test.go
Normal file
277
internal/thinking/convert_test.go
Normal file
@@ -0,0 +1,277 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
// TestConvertLevelToBudget tests the ConvertLevelToBudget function.
|
||||
//
|
||||
// ConvertLevelToBudget converts a thinking level to a budget value.
|
||||
// This is a semantic conversion - it does NOT apply clamping.
|
||||
//
|
||||
// Level → Budget mapping:
|
||||
// - none → 0
|
||||
// - auto → -1
|
||||
// - minimal → 512
|
||||
// - low → 1024
|
||||
// - medium → 8192
|
||||
// - high → 24576
|
||||
// - xhigh → 32768
|
||||
func TestConvertLevelToBudget(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
level string
|
||||
want int
|
||||
wantOK bool
|
||||
}{
|
||||
// Standard levels
|
||||
{"none", "none", 0, true},
|
||||
{"auto", "auto", -1, true},
|
||||
{"minimal", "minimal", 512, true},
|
||||
{"low", "low", 1024, true},
|
||||
{"medium", "medium", 8192, true},
|
||||
{"high", "high", 24576, true},
|
||||
{"xhigh", "xhigh", 32768, true},
|
||||
|
||||
// Case insensitive
|
||||
{"case insensitive HIGH", "HIGH", 24576, true},
|
||||
{"case insensitive High", "High", 24576, true},
|
||||
{"case insensitive NONE", "NONE", 0, true},
|
||||
{"case insensitive Auto", "Auto", -1, true},
|
||||
|
||||
// Invalid levels
|
||||
{"invalid ultra", "ultra", 0, false},
|
||||
{"invalid maximum", "maximum", 0, false},
|
||||
{"empty string", "", 0, false},
|
||||
{"whitespace", " ", 0, false},
|
||||
{"numeric string", "1000", 0, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
budget, ok := ConvertLevelToBudget(tt.level)
|
||||
if ok != tt.wantOK {
|
||||
t.Errorf("ConvertLevelToBudget(%q) ok = %v, want %v", tt.level, ok, tt.wantOK)
|
||||
}
|
||||
if budget != tt.want {
|
||||
t.Errorf("ConvertLevelToBudget(%q) = %d, want %d", tt.level, budget, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestConvertBudgetToLevel tests the ConvertBudgetToLevel function.
|
||||
//
|
||||
// ConvertBudgetToLevel converts a budget value to the nearest level.
|
||||
// Uses threshold-based mapping for range conversion.
|
||||
//
|
||||
// Budget → Level thresholds:
|
||||
// - -1 → auto
|
||||
// - 0 → none
|
||||
// - 1-512 → minimal
|
||||
// - 513-1024 → low
|
||||
// - 1025-8192 → medium
|
||||
// - 8193-24576 → high
|
||||
// - 24577+ → xhigh
|
||||
//
|
||||
// Depends on: Epic 4 Story 4-2 (budget to level conversion)
|
||||
func TestConvertBudgetToLevel(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
budget int
|
||||
want string
|
||||
wantOK bool
|
||||
}{
|
||||
// Special values
|
||||
{"auto", -1, "auto", true},
|
||||
{"none", 0, "none", true},
|
||||
|
||||
// Invalid negative values
|
||||
{"invalid negative -2", -2, "", false},
|
||||
{"invalid negative -100", -100, "", false},
|
||||
{"invalid negative extreme", -999999, "", false},
|
||||
|
||||
// Minimal range (1-512)
|
||||
{"minimal min", 1, "minimal", true},
|
||||
{"minimal mid", 256, "minimal", true},
|
||||
{"minimal max", 512, "minimal", true},
|
||||
|
||||
// Low range (513-1024)
|
||||
{"low start", 513, "low", true},
|
||||
{"low boundary", 1024, "low", true},
|
||||
|
||||
// Medium range (1025-8192)
|
||||
{"medium start", 1025, "medium", true},
|
||||
{"medium mid", 4096, "medium", true},
|
||||
{"medium boundary", 8192, "medium", true},
|
||||
|
||||
// High range (8193-24576)
|
||||
{"high start", 8193, "high", true},
|
||||
{"high mid", 16384, "high", true},
|
||||
{"high boundary", 24576, "high", true},
|
||||
|
||||
// XHigh range (24577+)
|
||||
{"xhigh start", 24577, "xhigh", true},
|
||||
{"xhigh mid", 32768, "xhigh", true},
|
||||
{"xhigh large", 100000, "xhigh", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
level, ok := ConvertBudgetToLevel(tt.budget)
|
||||
if ok != tt.wantOK {
|
||||
t.Errorf("ConvertBudgetToLevel(%d) ok = %v, want %v", tt.budget, ok, tt.wantOK)
|
||||
}
|
||||
if level != tt.want {
|
||||
t.Errorf("ConvertBudgetToLevel(%d) = %q, want %q", tt.budget, level, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestConvertMixedFormat tests mixed format handling.
|
||||
//
|
||||
// Tests scenarios where both level and budget might be present,
|
||||
// or where format conversion requires special handling.
|
||||
//
|
||||
// Depends on: Epic 4 Story 4-3 (mixed format handling)
|
||||
func TestConvertMixedFormat(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
inputBudget int
|
||||
inputLevel string
|
||||
wantMode ThinkingMode
|
||||
wantBudget int
|
||||
wantLevel ThinkingLevel
|
||||
}{
|
||||
// Level takes precedence when both present
|
||||
{"level and budget - level wins", 8192, "high", ModeLevel, 0, LevelHigh},
|
||||
{"level and zero budget", 0, "high", ModeLevel, 0, LevelHigh},
|
||||
|
||||
// Budget only
|
||||
{"budget only", 16384, "", ModeBudget, 16384, ""},
|
||||
|
||||
// Level only
|
||||
{"level only", 0, "medium", ModeLevel, 0, LevelMedium},
|
||||
|
||||
// Neither (default)
|
||||
{"neither", 0, "", ModeNone, 0, ""},
|
||||
|
||||
// Special values
|
||||
{"auto level", 0, "auto", ModeAuto, -1, LevelAuto},
|
||||
{"none level", 0, "none", ModeNone, 0, LevelNone},
|
||||
{"auto budget", -1, "", ModeAuto, -1, ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := normalizeMixedConfig(tt.inputBudget, tt.inputLevel)
|
||||
if got.Mode != tt.wantMode {
|
||||
t.Errorf("normalizeMixedConfig(%d, %q) Mode = %v, want %v", tt.inputBudget, tt.inputLevel, got.Mode, tt.wantMode)
|
||||
}
|
||||
if got.Budget != tt.wantBudget {
|
||||
t.Errorf("normalizeMixedConfig(%d, %q) Budget = %d, want %d", tt.inputBudget, tt.inputLevel, got.Budget, tt.wantBudget)
|
||||
}
|
||||
if got.Level != tt.wantLevel {
|
||||
t.Errorf("normalizeMixedConfig(%d, %q) Level = %q, want %q", tt.inputBudget, tt.inputLevel, got.Level, tt.wantLevel)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeForModel tests model-aware format normalization.
|
||||
func TestNormalizeForModel(t *testing.T) {
|
||||
budgetOnlyModel := ®istry.ModelInfo{
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 1024,
|
||||
Max: 128000,
|
||||
},
|
||||
}
|
||||
levelOnlyModel := ®istry.ModelInfo{
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Levels: []string{"low", "medium", "high"},
|
||||
},
|
||||
}
|
||||
hybridModel := ®istry.ModelInfo{
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 128,
|
||||
Max: 32768,
|
||||
Levels: []string{"minimal", "low", "medium", "high"},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config ThinkingConfig
|
||||
model *registry.ModelInfo
|
||||
want ThinkingConfig
|
||||
wantErr bool
|
||||
}{
|
||||
{"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false},
|
||||
{"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 24576}, false},
|
||||
{"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}, false},
|
||||
{"level-only keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, false},
|
||||
{"hybrid keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 16384}, hybridModel, ThinkingConfig{Mode: ModeBudget, Budget: 16384}, false},
|
||||
{"hybrid keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, hybridModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, false},
|
||||
{"auto passthrough", ThinkingConfig{Mode: ModeAuto, Budget: -1}, levelOnlyModel, ThinkingConfig{Mode: ModeAuto, Budget: -1}, false},
|
||||
{"none passthrough", ThinkingConfig{Mode: ModeNone, Budget: 0}, budgetOnlyModel, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
|
||||
{"invalid level", ThinkingConfig{Mode: ModeLevel, Level: "ultra"}, budgetOnlyModel, ThinkingConfig{}, true},
|
||||
{"invalid budget", ThinkingConfig{Mode: ModeBudget, Budget: -2}, levelOnlyModel, ThinkingConfig{}, true},
|
||||
{"nil modelInfo passthrough budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false},
|
||||
{"nil modelInfo passthrough level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, nil, ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, false},
|
||||
{"nil thinking degrades to none", ThinkingConfig{Mode: ModeBudget, Budget: 4096}, ®istry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
|
||||
{"nil thinking level degrades to none", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, ®istry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := NormalizeForModel(&tt.config, tt.model)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("NormalizeForModel(%+v) error = %v, wantErr %v", tt.config, err, tt.wantErr)
|
||||
}
|
||||
if tt.wantErr {
|
||||
return
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatalf("NormalizeForModel(%+v) returned nil config", tt.config)
|
||||
}
|
||||
if got.Mode != tt.want.Mode {
|
||||
t.Errorf("NormalizeForModel(%+v) Mode = %v, want %v", tt.config, got.Mode, tt.want.Mode)
|
||||
}
|
||||
if got.Budget != tt.want.Budget {
|
||||
t.Errorf("NormalizeForModel(%+v) Budget = %d, want %d", tt.config, got.Budget, tt.want.Budget)
|
||||
}
|
||||
if got.Level != tt.want.Level {
|
||||
t.Errorf("NormalizeForModel(%+v) Level = %q, want %q", tt.config, got.Level, tt.want.Level)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestLevelToBudgetRoundTrip tests level → budget → level round trip.
|
||||
//
|
||||
// Verifies that converting level to budget and back produces consistent results.
|
||||
//
|
||||
// Depends on: Epic 4 Story 4-1, 4-2
|
||||
func TestLevelToBudgetRoundTrip(t *testing.T) {
|
||||
levels := []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}
|
||||
|
||||
for _, level := range levels {
|
||||
t.Run(level, func(t *testing.T) {
|
||||
budget, ok := ConvertLevelToBudget(level)
|
||||
if !ok {
|
||||
t.Fatalf("ConvertLevelToBudget(%q) returned ok=false", level)
|
||||
}
|
||||
resultLevel, ok := ConvertBudgetToLevel(budget)
|
||||
if !ok {
|
||||
t.Fatalf("ConvertBudgetToLevel(%d) returned ok=false", budget)
|
||||
}
|
||||
if resultLevel != level {
|
||||
t.Errorf("round trip: %q → %d → %q, want %q", level, budget, resultLevel, level)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
71
internal/thinking/errors.go
Normal file
71
internal/thinking/errors.go
Normal file
@@ -0,0 +1,71 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
// ErrorCode represents the type of thinking configuration error.
|
||||
type ErrorCode string
|
||||
|
||||
// Error codes for thinking configuration processing.
|
||||
const (
|
||||
// ErrInvalidSuffix indicates the suffix format cannot be parsed.
|
||||
// Example: "model(abc" (missing closing parenthesis)
|
||||
ErrInvalidSuffix ErrorCode = "INVALID_SUFFIX"
|
||||
|
||||
// ErrUnknownLevel indicates the level value is not in the valid list.
|
||||
// Example: "model(ultra)" where "ultra" is not a valid level
|
||||
ErrUnknownLevel ErrorCode = "UNKNOWN_LEVEL"
|
||||
|
||||
// ErrThinkingNotSupported indicates the model does not support thinking.
|
||||
// Example: claude-haiku-4-5 does not have thinking capability
|
||||
ErrThinkingNotSupported ErrorCode = "THINKING_NOT_SUPPORTED"
|
||||
|
||||
// ErrLevelNotSupported indicates the model does not support level mode.
|
||||
// Example: using level with a budget-only model
|
||||
ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED"
|
||||
|
||||
// ErrProviderMismatch indicates the provider does not match the model.
|
||||
// Example: applying Claude format to a Gemini model
|
||||
ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH"
|
||||
)
|
||||
|
||||
// ThinkingError represents an error that occurred during thinking configuration processing.
|
||||
//
|
||||
// This error type provides structured information about the error, including:
|
||||
// - Code: A machine-readable error code for programmatic handling
|
||||
// - Message: A human-readable description of the error
|
||||
// - Model: The model name related to the error (optional)
|
||||
// - Details: Additional context information (optional)
|
||||
type ThinkingError struct {
|
||||
// Code is the machine-readable error code
|
||||
Code ErrorCode
|
||||
// Message is the human-readable error description.
|
||||
// Should be lowercase, no trailing period, with context if applicable.
|
||||
Message string
|
||||
// Model is the model name related to this error (optional)
|
||||
Model string
|
||||
// Details contains additional context information (optional)
|
||||
Details map[string]interface{}
|
||||
}
|
||||
|
||||
// Error implements the error interface.
|
||||
// Returns the message directly without code prefix.
|
||||
// Use Code field for programmatic error handling.
|
||||
func (e *ThinkingError) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
|
||||
// NewThinkingError creates a new ThinkingError with the given code and message.
|
||||
func NewThinkingError(code ErrorCode, message string) *ThinkingError {
|
||||
return &ThinkingError{
|
||||
Code: code,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
|
||||
// NewThinkingErrorWithModel creates a new ThinkingError with model context.
|
||||
func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingError {
|
||||
return &ThinkingError{
|
||||
Code: code,
|
||||
Message: message,
|
||||
Model: model,
|
||||
}
|
||||
}
|
||||
34
internal/thinking/errors_test.go
Normal file
34
internal/thinking/errors_test.go
Normal file
@@ -0,0 +1,34 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import "testing"
|
||||
|
||||
// TestThinkingErrorError tests the Error() method of ThinkingError.
|
||||
//
|
||||
// Error() returns the message directly without code prefix.
|
||||
// Use Code field for programmatic error handling.
|
||||
func TestThinkingErrorError(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err *ThinkingError
|
||||
wantMsg string
|
||||
wantCode ErrorCode
|
||||
}{
|
||||
{"invalid suffix format", NewThinkingError(ErrInvalidSuffix, "invalid suffix format: model(abc"), "invalid suffix format: model(abc", ErrInvalidSuffix},
|
||||
{"unknown level", NewThinkingError(ErrUnknownLevel, "unknown level: ultra"), "unknown level: ultra", ErrUnknownLevel},
|
||||
{"level not supported", NewThinkingError(ErrLevelNotSupported, "level \"xhigh\" not supported, valid levels: low, medium, high"), "level \"xhigh\" not supported, valid levels: low, medium, high", ErrLevelNotSupported},
|
||||
{"thinking not supported", NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "claude-haiku"), "thinking not supported for this model", ErrThinkingNotSupported},
|
||||
{"provider mismatch", NewThinkingError(ErrProviderMismatch, "provider mismatch: expected claude, got gemini"), "provider mismatch: expected claude, got gemini", ErrProviderMismatch},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := tt.err.Error(); got != tt.wantMsg {
|
||||
t.Errorf("Error() = %q, want %q", got, tt.wantMsg)
|
||||
}
|
||||
if tt.err.Code != tt.wantCode {
|
||||
t.Errorf("Code = %q, want %q", tt.err.Code, tt.wantCode)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
42
internal/thinking/extract_test.go
Normal file
42
internal/thinking/extract_test.go
Normal file
@@ -0,0 +1,42 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestExtractThinkingConfig(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
provider string
|
||||
want ThinkingConfig
|
||||
}{
|
||||
{"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 16384}},
|
||||
{"claude disabled type", `{"thinking":{"type":"disabled"}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}},
|
||||
{"claude auto budget", `{"thinking":{"budget_tokens":-1}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
|
||||
{"claude enabled type without budget", `{"thinking":{"type":"enabled"}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
|
||||
{"claude enabled type with budget", `{"thinking":{"type":"enabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 8192}},
|
||||
{"claude disabled type overrides budget", `{"thinking":{"type":"disabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}},
|
||||
{"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini", ThinkingConfig{Mode: ModeBudget, Budget: 8192}},
|
||||
{"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}},
|
||||
{"gemini cli auto", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"auto"}}}}`, "gemini-cli", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
|
||||
{"openai level", `{"reasoning_effort":"medium"}`, "openai", ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}},
|
||||
{"openai none", `{"reasoning_effort":"none"}`, "openai", ThinkingConfig{Mode: ModeNone, Budget: 0}},
|
||||
{"codex effort high", `{"reasoning":{"effort":"high"}}`, "codex", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}},
|
||||
{"codex effort none", `{"reasoning":{"effort":"none"}}`, "codex", ThinkingConfig{Mode: ModeNone, Budget: 0}},
|
||||
{"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "iflow", ThinkingConfig{Mode: ModeBudget, Budget: 1}},
|
||||
{"iflow disable", `{"reasoning_split":false}`, "iflow", ThinkingConfig{Mode: ModeNone, Budget: 0}},
|
||||
{"unknown provider", `{"thinking":{"budget_tokens":123}}`, "unknown", ThinkingConfig{}},
|
||||
{"invalid json", `{"thinking":`, "claude", ThinkingConfig{}},
|
||||
{"empty body", "", "claude", ThinkingConfig{}},
|
||||
{"no config", `{}`, "gemini", ThinkingConfig{}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := extractThinkingConfig([]byte(tt.body), tt.provider)
|
||||
if got != tt.want {
|
||||
t.Fatalf("extractThinkingConfig() = %+v, want %+v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
116
internal/thinking/provider/claude/apply.go
Normal file
116
internal/thinking/provider/claude/apply.go
Normal file
@@ -0,0 +1,116 @@
|
||||
// Package claude implements thinking configuration scaffolding for Claude models.
|
||||
//
|
||||
// Claude models use the thinking.budget_tokens format with values in the range
|
||||
// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5),
|
||||
// while older models do not.
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-6
|
||||
package claude
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for Claude models.
|
||||
// This applier is stateless and holds no configuration.
|
||||
type Applier struct{}
|
||||
|
||||
// NewApplier creates a new Claude thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("claude", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Claude request body.
|
||||
//
|
||||
// IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig.
|
||||
// ValidateConfig handles:
|
||||
// - Mode conversion (Level→Budget, Auto→Budget)
|
||||
// - Budget clamping to model range
|
||||
// - ZeroAllowed constraint enforcement
|
||||
//
|
||||
// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged.
|
||||
//
|
||||
// Expected output format when enabled:
|
||||
//
|
||||
// {
|
||||
// "thinking": {
|
||||
// "type": "enabled",
|
||||
// "budget_tokens": 16384
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Expected output format when disabled:
|
||||
//
|
||||
// {
|
||||
// "thinking": {
|
||||
// "type": "disabled"
|
||||
// }
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if modelInfo == nil {
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
if modelInfo.Type == "" {
|
||||
modelID := modelInfo.ID
|
||||
if modelID == "" {
|
||||
modelID = "unknown"
|
||||
}
|
||||
return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
|
||||
}
|
||||
return applyCompatibleClaude(body, config)
|
||||
}
|
||||
|
||||
// Only process ModeBudget and ModeNone; other modes pass through
|
||||
// (caller should use ValidateConfig first to normalize modes)
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
// Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced)
|
||||
// Decide enabled/disabled based on budget value
|
||||
if config.Budget == 0 {
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
|
||||
result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
|
||||
result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
|
||||
return result, nil
|
||||
case thinking.ModeAuto:
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
|
||||
return result, nil
|
||||
default:
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
288
internal/thinking/provider/claude/apply_test.go
Normal file
288
internal/thinking/provider/claude/apply_test.go
Normal file
@@ -0,0 +1,288 @@
|
||||
// Package claude implements thinking configuration for Claude models.
|
||||
package claude
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// =============================================================================
|
||||
// Unit Tests: Applier Creation and Interface
|
||||
// =============================================================================
|
||||
|
||||
func TestNewApplier(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
if applier == nil {
|
||||
t.Fatal("NewApplier() returned nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplierImplementsInterface(t *testing.T) {
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Unit Tests: Budget and Disable Logic (Pre-validated Config)
|
||||
// =============================================================================
|
||||
|
||||
// TestClaudeApplyBudgetAndNone tests budget values and disable modes.
|
||||
// NOTE: These tests assume config has been pre-validated by ValidateConfig.
|
||||
// Apply trusts the input and does not perform clamping.
|
||||
func TestClaudeApplyBudgetAndNone(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildClaudeModelInfo()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
wantType string
|
||||
wantBudget int
|
||||
wantBudgetOK bool
|
||||
}{
|
||||
// Valid pre-validated budget values
|
||||
{"budget 16k", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, "enabled", 16384, true},
|
||||
{"budget min", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1024}, "enabled", 1024, true},
|
||||
{"budget max", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 128000}, "enabled", 128000, true},
|
||||
{"budget mid", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "enabled", 50000, true},
|
||||
// Disable cases
|
||||
{"budget zero disables", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "disabled", 0, false},
|
||||
{"mode none disables", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "disabled", 0, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
thinkingType := gjson.GetBytes(result, "thinking.type").String()
|
||||
if thinkingType != tt.wantType {
|
||||
t.Fatalf("thinking.type = %q, want %q", thinkingType, tt.wantType)
|
||||
}
|
||||
|
||||
budgetValue := gjson.GetBytes(result, "thinking.budget_tokens")
|
||||
if budgetValue.Exists() != tt.wantBudgetOK {
|
||||
t.Fatalf("thinking.budget_tokens exists = %v, want %v", budgetValue.Exists(), tt.wantBudgetOK)
|
||||
}
|
||||
if tt.wantBudgetOK {
|
||||
if got := int(budgetValue.Int()); got != tt.wantBudget {
|
||||
t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestClaudeApplyPassthroughBudget tests that Apply trusts pre-validated budget values.
|
||||
// It does NOT perform clamping - that's ValidateConfig's responsibility.
|
||||
func TestClaudeApplyPassthroughBudget(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildClaudeModelInfo()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
wantBudget int
|
||||
}{
|
||||
// Apply should pass through the budget value as-is
|
||||
// (ValidateConfig would have clamped these, but Apply trusts the input)
|
||||
{"passes through any budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 500}, 500},
|
||||
{"passes through large budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 200000}, 200000},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
|
||||
t.Fatalf("thinking.budget_tokens = %d, want %d (passthrough)", got, tt.wantBudget)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Unit Tests: Mode Passthrough (Strict Layering)
|
||||
// =============================================================================
|
||||
|
||||
// TestClaudeApplyModePassthrough tests that non-Budget/None modes pass through unchanged.
|
||||
// Apply expects ValidateConfig to have already converted Level/Auto to Budget.
|
||||
func TestClaudeApplyModePassthrough(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildClaudeModelInfo()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
body string
|
||||
}{
|
||||
{"ModeLevel passes through", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "high"}, `{"model":"test"}`},
|
||||
{"ModeAuto passes through", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, `{"model":"test"}`},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
// Should return body unchanged
|
||||
if string(result) != tt.body {
|
||||
t.Fatalf("Apply() = %s, want %s (passthrough)", string(result), tt.body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Unit Tests: Output Format
|
||||
// =============================================================================
|
||||
|
||||
// TestClaudeApplyOutputFormat tests the exact JSON output format.
|
||||
//
|
||||
// Claude expects:
|
||||
//
|
||||
// {
|
||||
// "thinking": {
|
||||
// "type": "enabled",
|
||||
// "budget_tokens": 16384
|
||||
// }
|
||||
// }
|
||||
func TestClaudeApplyOutputFormat(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
wantJSON string
|
||||
}{
|
||||
{
|
||||
"enabled with budget",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
|
||||
`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
|
||||
},
|
||||
{
|
||||
"disabled",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0},
|
||||
`{"thinking":{"type":"disabled"}}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildClaudeModelInfo()
|
||||
|
||||
result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if string(result) != tt.wantJSON {
|
||||
t.Fatalf("Apply() = %s, want %s", result, tt.wantJSON)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Unit Tests: Body Merging
|
||||
// =============================================================================
|
||||
|
||||
// TestClaudeApplyWithExistingBody tests applying config to existing request body.
|
||||
func TestClaudeApplyWithExistingBody(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
config thinking.ThinkingConfig
|
||||
wantBody string
|
||||
}{
|
||||
{
|
||||
"add to empty body",
|
||||
`{}`,
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
|
||||
`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
|
||||
},
|
||||
{
|
||||
"preserve existing fields",
|
||||
`{"model":"claude-sonnet-4-5","messages":[]}`,
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
|
||||
`{"model":"claude-sonnet-4-5","messages":[],"thinking":{"type":"enabled","budget_tokens":8192}}`,
|
||||
},
|
||||
{
|
||||
"override existing thinking",
|
||||
`{"thinking":{"type":"enabled","budget_tokens":1000}}`,
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
|
||||
`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildClaudeModelInfo()
|
||||
|
||||
result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if string(result) != tt.wantBody {
|
||||
t.Fatalf("Apply() = %s, want %s", result, tt.wantBody)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestClaudeApplyWithNilBody tests handling of nil/empty body.
|
||||
func TestClaudeApplyWithNilBody(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildClaudeModelInfo()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body []byte
|
||||
wantBudget int
|
||||
}{
|
||||
{"nil body", nil, 16384},
|
||||
{"empty body", []byte{}, 16384},
|
||||
{"empty object", []byte(`{}`), 16384},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}
|
||||
result, err := applier.Apply(tt.body, config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
if got := gjson.GetBytes(result, "thinking.type").String(); got != "enabled" {
|
||||
t.Fatalf("thinking.type = %q, want %q", got, "enabled")
|
||||
}
|
||||
if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
|
||||
t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Helper Functions
|
||||
// =============================================================================
|
||||
|
||||
func buildClaudeModelInfo() *registry.ModelInfo {
|
||||
return ®istry.ModelInfo{
|
||||
ID: "claude-sonnet-4-5",
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 1024,
|
||||
Max: 128000,
|
||||
ZeroAllowed: true,
|
||||
DynamicAllowed: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
138
internal/thinking/provider/codex/apply.go
Normal file
138
internal/thinking/provider/codex/apply.go
Normal file
@@ -0,0 +1,138 @@
|
||||
// Package codex implements thinking configuration for Codex (OpenAI Responses API) models.
|
||||
//
|
||||
// Codex models use the reasoning.effort format with discrete levels
|
||||
// (low/medium/high). This is similar to OpenAI but uses nested field
|
||||
// "reasoning.effort" instead of "reasoning_effort".
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
|
||||
package codex
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for Codex models.
|
||||
//
|
||||
// Codex-specific behavior:
|
||||
// - Output format: reasoning.effort (string: low/medium/high/xhigh)
|
||||
// - Level-only mode: no numeric budget support
|
||||
// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new Codex thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("codex", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Codex request body.
|
||||
//
|
||||
// Expected output format:
|
||||
//
|
||||
// {
|
||||
// "reasoning": {
|
||||
// "effort": "high"
|
||||
// }
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if modelInfo == nil {
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
if modelInfo.Type == "" {
|
||||
modelID := modelInfo.ID
|
||||
if modelID == "" {
|
||||
modelID = "unknown"
|
||||
}
|
||||
return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
|
||||
}
|
||||
return applyCompatibleCodex(body, config)
|
||||
}
|
||||
|
||||
// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
|
||||
if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel {
|
||||
result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
effort := ""
|
||||
support := modelInfo.Thinking
|
||||
if config.Budget == 0 {
|
||||
if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
|
||||
effort = string(thinking.LevelNone)
|
||||
}
|
||||
}
|
||||
if effort == "" && config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
if effort == "" && len(support.Levels) > 0 {
|
||||
effort = support.Levels[0]
|
||||
}
|
||||
if effort == "" {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
var effort string
|
||||
switch config.Mode {
|
||||
case thinking.ModeLevel:
|
||||
if config.Level == "" {
|
||||
return body, nil
|
||||
}
|
||||
effort = string(config.Level)
|
||||
case thinking.ModeNone:
|
||||
effort = string(thinking.LevelNone)
|
||||
if config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
case thinking.ModeAuto:
|
||||
// Auto mode for user-defined models: pass through as "auto"
|
||||
effort = string(thinking.LevelAuto)
|
||||
case thinking.ModeBudget:
|
||||
// Budget mode: convert budget to level using threshold mapping
|
||||
level, ok := thinking.ConvertBudgetToLevel(config.Budget)
|
||||
if !ok {
|
||||
return body, nil
|
||||
}
|
||||
effort = level
|
||||
default:
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func hasLevel(levels []string, target string) bool {
|
||||
for _, level := range levels {
|
||||
if strings.EqualFold(strings.TrimSpace(level), target) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
172
internal/thinking/provider/gemini/apply.go
Normal file
172
internal/thinking/provider/gemini/apply.go
Normal file
@@ -0,0 +1,172 @@
|
||||
// Package gemini implements thinking configuration for Gemini models.
|
||||
//
|
||||
// Gemini models have two formats:
|
||||
// - Gemini 2.5: Uses thinkingBudget (numeric)
|
||||
// - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high)
|
||||
// or thinkingBudget=-1 for auto/dynamic mode
|
||||
//
|
||||
// Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels:
|
||||
// - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x)
|
||||
// - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels)
|
||||
// - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5)
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier applies thinking configuration for Gemini models.
|
||||
//
|
||||
// Gemini-specific behavior:
|
||||
// - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed
|
||||
// - Gemini 3.x: thinkingLevel format, cannot be disabled
|
||||
// - Use ThinkingSupport.Levels to decide output format
|
||||
type Applier struct{}
|
||||
|
||||
// NewApplier creates a new Gemini thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("gemini", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Gemini request body.
|
||||
//
|
||||
// Expected output format (Gemini 2.5):
|
||||
//
|
||||
// {
|
||||
// "generationConfig": {
|
||||
// "thinkingConfig": {
|
||||
// "thinkingBudget": 8192,
|
||||
// "includeThoughts": true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Expected output format (Gemini 3.x):
|
||||
//
|
||||
// {
|
||||
// "generationConfig": {
|
||||
// "thinkingConfig": {
|
||||
// "thinkingLevel": "high",
|
||||
// "includeThoughts": true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if modelInfo == nil {
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
if modelInfo.Type == "" {
|
||||
modelID := modelInfo.ID
|
||||
if modelID == "" {
|
||||
modelID = "unknown"
|
||||
}
|
||||
return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
|
||||
}
|
||||
return a.applyCompatible(body, config)
|
||||
}
|
||||
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
// Choose format based on config.Mode and model capabilities:
|
||||
// - ModeLevel: use Level format (validation will reject unsupported levels)
|
||||
// - ModeNone: use Level format if model has Levels, else Budget format
|
||||
// - ModeBudget/ModeAuto: use Budget format
|
||||
switch config.Mode {
|
||||
case thinking.ModeLevel:
|
||||
return a.applyLevelFormat(body, config)
|
||||
case thinking.ModeNone:
|
||||
// ModeNone: route based on model capability (has Levels or not)
|
||||
if len(modelInfo.Thinking.Levels) > 0 {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
return a.applyBudgetFormat(body, config)
|
||||
default:
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// ModeNone semantics:
|
||||
// - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models)
|
||||
// - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
|
||||
// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.
|
||||
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
|
||||
|
||||
if config.Mode == thinking.ModeNone {
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
|
||||
if config.Level != "" {
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Only handle ModeLevel - budget conversion should be done by upper layer
|
||||
if config.Mode != thinking.ModeLevel {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
level := string(config.Level)
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
|
||||
|
||||
budget := config.Budget
|
||||
// ModeNone semantics:
|
||||
// - ModeNone + Budget=0: completely disable thinking
|
||||
// - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
|
||||
// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
|
||||
includeThoughts := false
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
includeThoughts = false
|
||||
case thinking.ModeAuto:
|
||||
includeThoughts = true
|
||||
default:
|
||||
includeThoughts = budget > 0
|
||||
}
|
||||
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
|
||||
return result, nil
|
||||
}
|
||||
526
internal/thinking/provider/gemini/apply_test.go
Normal file
526
internal/thinking/provider/gemini/apply_test.go
Normal file
@@ -0,0 +1,526 @@
|
||||
// Package gemini implements thinking configuration for Gemini models.
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestNewApplier(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
if applier == nil {
|
||||
t.Fatal("NewApplier() returned nil")
|
||||
}
|
||||
}
|
||||
|
||||
// parseConfigFromSuffix parses a raw suffix into a ThinkingConfig.
|
||||
// This helper reduces code duplication in end-to-end tests (L1 fix).
|
||||
func parseConfigFromSuffix(rawSuffix string) (thinking.ThinkingConfig, bool) {
|
||||
if budget, ok := thinking.ParseNumericSuffix(rawSuffix); ok {
|
||||
return thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: budget}, true
|
||||
}
|
||||
if level, ok := thinking.ParseLevelSuffix(rawSuffix); ok {
|
||||
return thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: level}, true
|
||||
}
|
||||
if mode, ok := thinking.ParseSpecialSuffix(rawSuffix); ok {
|
||||
config := thinking.ThinkingConfig{Mode: mode}
|
||||
if mode == thinking.ModeAuto {
|
||||
config.Budget = -1
|
||||
}
|
||||
return config, true
|
||||
}
|
||||
return thinking.ThinkingConfig{}, false
|
||||
}
|
||||
|
||||
func TestApplierImplementsInterface(t *testing.T) {
|
||||
// Compile-time check: if Applier doesn't implement the interface, this won't compile
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
}
|
||||
|
||||
// TestGeminiApply tests the Gemini thinking applier.
|
||||
//
|
||||
// Gemini-specific behavior:
|
||||
// - Gemini 2.5: thinkingBudget format (numeric)
|
||||
// - Gemini 3.x: thinkingLevel format (string)
|
||||
// - Flash series: ZeroAllowed=true
|
||||
// - Pro series: ZeroAllowed=false, Min=128
|
||||
// - CRITICAL: When budget=0/none, set includeThoughts=false
|
||||
//
|
||||
// Depends on: Epic 7 Story 7-2, 7-3
|
||||
func TestGeminiApply(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
wantField string
|
||||
wantValue interface{}
|
||||
wantIncludeThoughts bool // CRITICAL: includeThoughts field
|
||||
}{
|
||||
// Gemini 2.5 Flash (ZeroAllowed=true)
|
||||
{"flash budget 8k", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
|
||||
{"flash zero", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
|
||||
{"flash none", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
|
||||
|
||||
// Gemini 2.5 Pro (ZeroAllowed=false, Min=128)
|
||||
{"pro budget 8k", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
|
||||
{"pro zero - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 128, false},
|
||||
{"pro none - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 128, false},
|
||||
{"pro below min", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50}, "thinkingBudget", 128, true},
|
||||
{"pro above max", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "thinkingBudget", 32768, true},
|
||||
{"pro auto", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
|
||||
|
||||
// Gemini 3 Pro (Level mode, ZeroAllowed=false)
|
||||
{"g3-pro high", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
|
||||
{"g3-pro low", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
|
||||
{"g3-pro auto", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
|
||||
|
||||
// Gemini 3 Flash (Level mode, minimal is lowest)
|
||||
{"g3-flash high", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
|
||||
{"g3-flash medium", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "thinkingLevel", "medium", true},
|
||||
{"g3-flash minimal", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildGeminiModelInfo(tt.model)
|
||||
normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig() error = %v", err)
|
||||
}
|
||||
|
||||
result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
|
||||
switch want := tt.wantValue.(type) {
|
||||
case int:
|
||||
if int(gotField.Int()) != want {
|
||||
t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
|
||||
}
|
||||
case string:
|
||||
if gotField.String() != want {
|
||||
t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
|
||||
}
|
||||
case bool:
|
||||
if gotField.Bool() != want {
|
||||
t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported wantValue type %T", tt.wantValue)
|
||||
}
|
||||
|
||||
gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
|
||||
if gotIncludeThoughts != tt.wantIncludeThoughts {
|
||||
t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyEndToEndBudgetZero tests suffix parsing + validation + apply for budget=0.
|
||||
//
|
||||
// This test covers the complete flow from suffix parsing to Apply output:
|
||||
// - AC#1: ModeBudget+Budget=0 → ModeNone conversion
|
||||
// - AC#3: Gemini 3 ModeNone+Budget>0 → includeThoughts=false + thinkingLevel=low
|
||||
// - AC#4: Gemini 2.5 Pro (0) → clamped to 128 + includeThoughts=false
|
||||
func TestGeminiApplyEndToEndBudgetZero(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantModel string
|
||||
wantField string // "thinkingBudget" or "thinkingLevel"
|
||||
wantValue interface{}
|
||||
wantIncludeThoughts bool
|
||||
}{
|
||||
// AC#4: Gemini 2.5 Pro - Budget format
|
||||
{"gemini-25-pro zero", "gemini-2.5-pro(0)", "gemini-2.5-pro", "thinkingBudget", 128, false},
|
||||
// AC#3: Gemini 3 Pro - Level format, ModeNone clamped to Budget=128, uses lowest level
|
||||
{"gemini-3-pro zero", "gemini-3-pro-preview(0)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
|
||||
{"gemini-3-pro none", "gemini-3-pro-preview(none)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
|
||||
// Gemini 3 Flash - Level format, lowest level is "minimal"
|
||||
{"gemini-3-flash zero", "gemini-3-flash-preview(0)", "gemini-3-flash-preview", "thinkingLevel", "minimal", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
suffix := thinking.ParseSuffix(tt.model)
|
||||
if !suffix.HasSuffix {
|
||||
t.Fatalf("ParseSuffix(%q) HasSuffix = false, want true", tt.model)
|
||||
}
|
||||
if suffix.ModelName != tt.wantModel {
|
||||
t.Fatalf("ParseSuffix(%q) ModelName = %q, want %q", tt.model, suffix.ModelName, tt.wantModel)
|
||||
}
|
||||
|
||||
// Parse suffix value using helper function (L1 fix)
|
||||
config, ok := parseConfigFromSuffix(suffix.RawSuffix)
|
||||
if !ok {
|
||||
t.Fatalf("ParseSuffix(%q) RawSuffix = %q is not a valid suffix", tt.model, suffix.RawSuffix)
|
||||
}
|
||||
|
||||
modelInfo := buildGeminiModelInfo(suffix.ModelName)
|
||||
normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig() error = %v", err)
|
||||
}
|
||||
|
||||
applier := NewApplier()
|
||||
result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
// Verify the output field value
|
||||
gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
|
||||
switch want := tt.wantValue.(type) {
|
||||
case int:
|
||||
if int(gotField.Int()) != want {
|
||||
t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
|
||||
}
|
||||
case string:
|
||||
if gotField.String() != want {
|
||||
t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
|
||||
}
|
||||
}
|
||||
|
||||
gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
|
||||
if gotIncludeThoughts != tt.wantIncludeThoughts {
|
||||
t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyEndToEndAuto tests auto mode through both suffix parsing and direct config.
|
||||
//
|
||||
// This test covers:
|
||||
// - AC#2: Gemini 2.5 auto uses thinkingBudget=-1
|
||||
// - AC#3: Gemini 3 auto uses thinkingBudget=-1 (not thinkingLevel)
|
||||
// - Suffix parsing path: (auto) and (-1) suffixes
|
||||
// - Direct config path: ModeLevel + Level=auto → ModeAuto conversion
|
||||
func TestGeminiApplyEndToEndAuto(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string // model name (with suffix for parsing, or plain for direct config)
|
||||
directConfig *thinking.ThinkingConfig // if not nil, use direct config instead of suffix parsing
|
||||
wantField string
|
||||
wantValue int
|
||||
wantIncludeThoughts bool
|
||||
}{
|
||||
// Suffix parsing path - Budget-only model (Gemini 2.5)
|
||||
{"suffix auto g25", "gemini-2.5-pro(auto)", nil, "thinkingBudget", -1, true},
|
||||
{"suffix -1 g25", "gemini-2.5-pro(-1)", nil, "thinkingBudget", -1, true},
|
||||
// Suffix parsing path - Hybrid model (Gemini 3)
|
||||
{"suffix auto g3", "gemini-3-pro-preview(auto)", nil, "thinkingBudget", -1, true},
|
||||
{"suffix -1 g3", "gemini-3-pro-preview(-1)", nil, "thinkingBudget", -1, true},
|
||||
// Direct config path - Level=auto → ModeAuto conversion
|
||||
{"direct level=auto g25", "gemini-2.5-pro", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
|
||||
{"direct level=auto g3", "gemini-3-pro-preview", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var config thinking.ThinkingConfig
|
||||
var modelName string
|
||||
|
||||
if tt.directConfig != nil {
|
||||
// Direct config path
|
||||
config = *tt.directConfig
|
||||
modelName = tt.model
|
||||
} else {
|
||||
// Suffix parsing path
|
||||
suffix := thinking.ParseSuffix(tt.model)
|
||||
if !suffix.HasSuffix {
|
||||
t.Fatalf("ParseSuffix(%q) HasSuffix = false", tt.model)
|
||||
}
|
||||
modelName = suffix.ModelName
|
||||
var ok bool
|
||||
config, ok = parseConfigFromSuffix(suffix.RawSuffix)
|
||||
if !ok {
|
||||
t.Fatalf("parseConfigFromSuffix(%q) failed", suffix.RawSuffix)
|
||||
}
|
||||
}
|
||||
|
||||
modelInfo := buildGeminiModelInfo(modelName)
|
||||
normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig() error = %v", err)
|
||||
}
|
||||
|
||||
// Verify ModeAuto after validation
|
||||
if normalized.Mode != thinking.ModeAuto {
|
||||
t.Fatalf("ValidateConfig() Mode = %v, want ModeAuto", normalized.Mode)
|
||||
}
|
||||
|
||||
applier := NewApplier()
|
||||
result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
|
||||
if int(gotField.Int()) != tt.wantValue {
|
||||
t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), tt.wantValue)
|
||||
}
|
||||
|
||||
gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
|
||||
if gotIncludeThoughts != tt.wantIncludeThoughts {
|
||||
t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiApplyInvalidBody(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildGeminiModelInfo("gemini-2.5-flash")
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig() error = %v", err)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body []byte
|
||||
}{
|
||||
{"nil body", nil},
|
||||
{"empty body", []byte{}},
|
||||
{"invalid json", []byte("{\"not json\"")},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply(tt.body, *normalized, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
gotBudget := int(gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int())
|
||||
if gotBudget != 8192 {
|
||||
t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
|
||||
}
|
||||
|
||||
gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
|
||||
if !gotIncludeThoughts {
|
||||
t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyConflictingFields tests that conflicting fields are removed.
|
||||
//
|
||||
// When applying Budget format, any existing thinkingLevel should be removed.
|
||||
// When applying Level format, any existing thinkingBudget should be removed.
|
||||
func TestGeminiApplyConflictingFields(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
existingBody string
|
||||
wantField string // expected field to exist
|
||||
wantNoField string // expected field to NOT exist
|
||||
}{
|
||||
// Budget format should remove existing thinkingLevel
|
||||
{
|
||||
"budget removes level",
|
||||
"gemini-2.5-pro",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
|
||||
"thinkingBudget",
|
||||
"thinkingLevel",
|
||||
},
|
||||
// Level format should remove existing thinkingBudget
|
||||
{
|
||||
"level removes budget",
|
||||
"gemini-3-pro-preview",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
|
||||
"thinkingLevel",
|
||||
"thinkingBudget",
|
||||
},
|
||||
// ModeAuto uses budget format, should remove thinkingLevel
|
||||
{
|
||||
"auto removes level",
|
||||
"gemini-3-pro-preview",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
|
||||
`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
|
||||
"thinkingBudget",
|
||||
"thinkingLevel",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildGeminiModelInfo(tt.model)
|
||||
result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
// Verify expected field exists
|
||||
wantPath := "generationConfig.thinkingConfig." + tt.wantField
|
||||
if !gjson.GetBytes(result, wantPath).Exists() {
|
||||
t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
|
||||
}
|
||||
|
||||
// Verify conflicting field was removed
|
||||
noPath := "generationConfig.thinkingConfig." + tt.wantNoField
|
||||
if gjson.GetBytes(result, noPath).Exists() {
|
||||
t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
|
||||
func TestGeminiApplyThinkingNotSupported(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
|
||||
// Model with nil Thinking support
|
||||
modelInfo := ®istry.ModelInfo{ID: "gemini-unknown", Thinking: nil}
|
||||
|
||||
_, err := applier.Apply([]byte(`{}`), config, modelInfo)
|
||||
if err == nil {
|
||||
t.Fatal("Apply() expected error for nil Thinking, got nil")
|
||||
}
|
||||
|
||||
// Verify it's the correct error type
|
||||
thinkErr, ok := err.(*thinking.ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
|
||||
}
|
||||
if thinkErr.Code != thinking.ErrThinkingNotSupported {
|
||||
t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
|
||||
}
|
||||
}
|
||||
|
||||
func buildGeminiModelInfo(modelID string) *registry.ModelInfo {
|
||||
support := ®istry.ThinkingSupport{}
|
||||
switch modelID {
|
||||
case "gemini-2.5-pro":
|
||||
support.Min = 128
|
||||
support.Max = 32768
|
||||
support.ZeroAllowed = false
|
||||
support.DynamicAllowed = true
|
||||
case "gemini-2.5-flash", "gemini-2.5-flash-lite":
|
||||
support.Min = 0
|
||||
support.Max = 24576
|
||||
support.ZeroAllowed = true
|
||||
support.DynamicAllowed = true
|
||||
case "gemini-3-pro-preview":
|
||||
support.Min = 128
|
||||
support.Max = 32768
|
||||
support.ZeroAllowed = false
|
||||
support.DynamicAllowed = true
|
||||
support.Levels = []string{"low", "high"}
|
||||
case "gemini-3-flash-preview":
|
||||
support.Min = 128
|
||||
support.Max = 32768
|
||||
support.ZeroAllowed = false
|
||||
support.DynamicAllowed = true
|
||||
support.Levels = []string{"minimal", "low", "medium", "high"}
|
||||
default:
|
||||
// Unknown model - return nil Thinking to trigger error path
|
||||
return ®istry.ModelInfo{ID: modelID, Thinking: nil}
|
||||
}
|
||||
return ®istry.ModelInfo{
|
||||
ID: modelID,
|
||||
Thinking: support,
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyNilModelInfo tests Apply behavior when modelInfo is nil.
|
||||
// Coverage: apply.go:56-58 (H1)
|
||||
func TestGeminiApplyNilModelInfo(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
body := []byte(`{"existing": "data"}`)
|
||||
|
||||
result, err := applier.Apply(body, config, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
|
||||
}
|
||||
if string(result) != string(body) {
|
||||
t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyEmptyModelID tests Apply when modelID is empty.
|
||||
// Coverage: apply.go:61-63 (H2)
|
||||
func TestGeminiApplyEmptyModelID(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
modelInfo := ®istry.ModelInfo{ID: "", Thinking: nil}
|
||||
|
||||
_, err := applier.Apply([]byte(`{}`), config, modelInfo)
|
||||
if err == nil {
|
||||
t.Fatal("Apply() with empty modelID and nil Thinking should error")
|
||||
}
|
||||
thinkErr, ok := err.(*thinking.ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
|
||||
}
|
||||
if thinkErr.Model != "unknown" {
|
||||
t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyModeBudgetWithLevels tests that ModeBudget is applied with budget format
|
||||
// even for models with Levels. The Apply layer handles ModeBudget by applying thinkingBudget.
|
||||
// Coverage: apply.go:88-90
|
||||
func TestGeminiApplyModeBudgetWithLevels(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildGeminiModelInfo("gemini-3-flash-preview")
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
body := []byte(`{"existing": "data"}`)
|
||||
|
||||
result, err := applier.Apply(body, config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
// ModeBudget applies budget format
|
||||
budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int()
|
||||
if budget != 8192 {
|
||||
t.Fatalf("Apply() expected thinkingBudget=8192, got: %d", budget)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiApplyUnsupportedMode tests behavior with unsupported Mode types.
|
||||
// Coverage: apply.go:67-69 and 97-98 (H5, L2)
|
||||
func TestGeminiApplyUnsupportedMode(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
body := []byte(`{"existing": "data"}`)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
}{
|
||||
{"unknown mode with budget model", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
|
||||
{"unknown mode with level model", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildGeminiModelInfo(tt.model)
|
||||
result, err := applier.Apply(body, tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
// Unsupported modes return original body unchanged
|
||||
if string(result) != string(body) {
|
||||
t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
128
internal/thinking/provider/geminicli/apply.go
Normal file
128
internal/thinking/provider/geminicli/apply.go
Normal file
@@ -0,0 +1,128 @@
|
||||
// Package geminicli implements thinking configuration for Gemini CLI API format.
|
||||
//
|
||||
// Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of
|
||||
// generationConfig.thinkingConfig.* used by standard Gemini API.
|
||||
package geminicli
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier applies thinking configuration for Gemini CLI API format.
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new Gemini CLI thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
applier := NewApplier()
|
||||
thinking.RegisterProvider("gemini-cli", applier)
|
||||
thinking.RegisterProvider("antigravity", applier)
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Gemini CLI request body.
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if modelInfo == nil {
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
if modelInfo.Type == "" {
|
||||
modelID := modelInfo.ID
|
||||
if modelID == "" {
|
||||
modelID = "unknown"
|
||||
}
|
||||
return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
|
||||
}
|
||||
return a.applyCompatible(body, config)
|
||||
}
|
||||
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
// ModeAuto: Always use Budget format with thinkingBudget=-1
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
// For non-auto modes, choose format based on model capabilities
|
||||
support := modelInfo.Thinking
|
||||
if len(support.Levels) > 0 {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
|
||||
if config.Mode == thinking.ModeNone {
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
|
||||
if config.Level != "" {
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Only handle ModeLevel - budget conversion should be done by upper layer
|
||||
if config.Mode != thinking.ModeLevel {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
level := string(config.Level)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
|
||||
|
||||
budget := config.Budget
|
||||
includeThoughts := false
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
includeThoughts = false
|
||||
case thinking.ModeAuto:
|
||||
includeThoughts = true
|
||||
default:
|
||||
includeThoughts = budget > 0
|
||||
}
|
||||
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
|
||||
return result, nil
|
||||
}
|
||||
382
internal/thinking/provider/geminicli/apply_test.go
Normal file
382
internal/thinking/provider/geminicli/apply_test.go
Normal file
@@ -0,0 +1,382 @@
|
||||
// Package geminicli implements thinking configuration for Gemini CLI API format.
|
||||
package geminicli
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestNewApplier(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
if applier == nil {
|
||||
t.Fatal("NewApplier() returned nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplierImplementsInterface(t *testing.T) {
|
||||
// Compile-time check: if Applier doesn't implement the interface, this won't compile
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
}
|
||||
|
||||
// TestGeminiCLIApply tests the Gemini CLI thinking applier.
|
||||
//
|
||||
// Gemini CLI uses request.generationConfig.thinkingConfig.* path.
|
||||
// Behavior mirrors Gemini applier but with different JSON path prefix.
|
||||
func TestGeminiCLIApply(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
wantField string
|
||||
wantValue interface{}
|
||||
wantIncludeThoughts bool
|
||||
}{
|
||||
// Budget mode (no Levels)
|
||||
{"budget 8k", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
|
||||
{"budget zero", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
|
||||
{"none mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
|
||||
{"auto mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
|
||||
|
||||
// Level mode (has Levels)
|
||||
{"level high", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
|
||||
{"level low", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
|
||||
{"level minimal", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
|
||||
// ModeAuto with Levels model still uses thinkingBudget=-1
|
||||
{"auto with levels", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildGeminiCLIModelInfo(tt.model)
|
||||
result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
gotField := gjson.GetBytes(result, "request.generationConfig.thinkingConfig."+tt.wantField)
|
||||
switch want := tt.wantValue.(type) {
|
||||
case int:
|
||||
if int(gotField.Int()) != want {
|
||||
t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
|
||||
}
|
||||
case string:
|
||||
if gotField.String() != want {
|
||||
t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
|
||||
}
|
||||
case bool:
|
||||
if gotField.Bool() != want {
|
||||
t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported wantValue type %T", tt.wantValue)
|
||||
}
|
||||
|
||||
gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
|
||||
if gotIncludeThoughts != tt.wantIncludeThoughts {
|
||||
t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyModeNoneWithLevel tests ModeNone with Level model.
|
||||
// When ModeNone is used with a model that has Levels, includeThoughts should be false.
|
||||
func TestGeminiCLIApplyModeNoneWithLevel(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeNone, Level: thinking.LevelLow}
|
||||
|
||||
result, err := applier.Apply([]byte(`{}`), config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
|
||||
if gotIncludeThoughts != false {
|
||||
t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, false)
|
||||
}
|
||||
|
||||
gotLevel := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel").String()
|
||||
if gotLevel != "low" {
|
||||
t.Fatalf("thinkingLevel = %q, want %q", gotLevel, "low")
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyInvalidBody tests Apply behavior with invalid body inputs.
|
||||
func TestGeminiCLIApplyInvalidBody(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildGeminiCLIModelInfo("gemini-cli-budget")
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body []byte
|
||||
}{
|
||||
{"nil body", nil},
|
||||
{"empty body", []byte{}},
|
||||
{"invalid json", []byte("{\"not json\"")},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply(tt.body, config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
gotBudget := int(gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Int())
|
||||
if gotBudget != 8192 {
|
||||
t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
|
||||
}
|
||||
|
||||
gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
|
||||
if !gotIncludeThoughts {
|
||||
t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyConflictingFields tests that conflicting fields are removed.
|
||||
//
|
||||
// When applying Budget format, any existing thinkingLevel should be removed.
|
||||
// When applying Level format, any existing thinkingBudget should be removed.
|
||||
func TestGeminiCLIApplyConflictingFields(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
existingBody string
|
||||
wantField string // expected field to exist
|
||||
wantNoField string // expected field to NOT exist
|
||||
}{
|
||||
// Budget format should remove existing thinkingLevel
|
||||
{
|
||||
"budget removes level",
|
||||
"gemini-cli-budget",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
|
||||
`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
|
||||
"thinkingBudget",
|
||||
"thinkingLevel",
|
||||
},
|
||||
// Level format should remove existing thinkingBudget
|
||||
{
|
||||
"level removes budget",
|
||||
"gemini-cli-level",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
|
||||
`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
|
||||
"thinkingLevel",
|
||||
"thinkingBudget",
|
||||
},
|
||||
// ModeAuto uses budget format, should remove thinkingLevel
|
||||
{
|
||||
"auto removes level",
|
||||
"gemini-cli-level",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
|
||||
`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
|
||||
"thinkingBudget",
|
||||
"thinkingLevel",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildGeminiCLIModelInfo(tt.model)
|
||||
result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
// Verify expected field exists
|
||||
wantPath := "request.generationConfig.thinkingConfig." + tt.wantField
|
||||
if !gjson.GetBytes(result, wantPath).Exists() {
|
||||
t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
|
||||
}
|
||||
|
||||
// Verify conflicting field was removed
|
||||
noPath := "request.generationConfig.thinkingConfig." + tt.wantNoField
|
||||
if gjson.GetBytes(result, noPath).Exists() {
|
||||
t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
|
||||
func TestGeminiCLIApplyThinkingNotSupported(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
|
||||
// Model with nil Thinking support
|
||||
modelInfo := ®istry.ModelInfo{ID: "gemini-cli-unknown", Thinking: nil}
|
||||
|
||||
_, err := applier.Apply([]byte(`{}`), config, modelInfo)
|
||||
if err == nil {
|
||||
t.Fatal("Apply() expected error for nil Thinking, got nil")
|
||||
}
|
||||
|
||||
// Verify it's the correct error type
|
||||
thinkErr, ok := err.(*thinking.ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
|
||||
}
|
||||
if thinkErr.Code != thinking.ErrThinkingNotSupported {
|
||||
t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyNilModelInfo tests Apply behavior when modelInfo is nil.
|
||||
func TestGeminiCLIApplyNilModelInfo(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
body := []byte(`{"existing": "data"}`)
|
||||
|
||||
result, err := applier.Apply(body, config, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
|
||||
}
|
||||
if string(result) != string(body) {
|
||||
t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyEmptyModelID tests Apply when modelID is empty.
|
||||
func TestGeminiCLIApplyEmptyModelID(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
modelInfo := ®istry.ModelInfo{ID: "", Thinking: nil}
|
||||
|
||||
_, err := applier.Apply([]byte(`{}`), config, modelInfo)
|
||||
if err == nil {
|
||||
t.Fatal("Apply() with empty modelID and nil Thinking should error")
|
||||
}
|
||||
thinkErr, ok := err.(*thinking.ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
|
||||
}
|
||||
if thinkErr.Model != "unknown" {
|
||||
t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyModeBudgetWithLevels tests that ModeBudget with Levels model passes through.
|
||||
// Apply layer doesn't convert - upper layer should handle Budget→Level conversion.
|
||||
func TestGeminiCLIApplyModeBudgetWithLevels(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
|
||||
config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
|
||||
body := []byte(`{"existing": "data"}`)
|
||||
|
||||
result, err := applier.Apply(body, config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
// ModeBudget with Levels model: Apply returns body unchanged (conversion is upper layer's job)
|
||||
if string(result) != string(body) {
|
||||
t.Fatalf("Apply() ModeBudget with Levels should return original body, got: %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGeminiCLIApplyUnsupportedMode tests behavior with unsupported Mode types.
|
||||
func TestGeminiCLIApplyUnsupportedMode(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
body := []byte(`{"existing": "data"}`)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
}{
|
||||
{"unknown mode with budget model", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
|
||||
{"unknown mode with level model", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildGeminiCLIModelInfo(tt.model)
|
||||
result, err := applier.Apply(body, tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
// Unsupported modes return original body unchanged
|
||||
if string(result) != string(body) {
|
||||
t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestAntigravityUsesGeminiCLIFormat tests that antigravity provider uses gemini-cli format.
|
||||
// Antigravity is registered with the same applier as gemini-cli.
|
||||
func TestAntigravityUsesGeminiCLIFormat(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
modelInfo *registry.ModelInfo
|
||||
wantField string
|
||||
}{
|
||||
{
|
||||
"claude model budget",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
|
||||
®istry.ModelInfo{ID: "gemini-claude-sonnet-4-5-thinking", Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 200000}},
|
||||
"request.generationConfig.thinkingConfig.thinkingBudget",
|
||||
},
|
||||
{
|
||||
"opus model budget",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 32768},
|
||||
®istry.ModelInfo{ID: "gemini-claude-opus-4-5-thinking", Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 200000}},
|
||||
"request.generationConfig.thinkingConfig.thinkingBudget",
|
||||
},
|
||||
{
|
||||
"model with levels",
|
||||
thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
|
||||
®istry.ModelInfo{ID: "some-model-with-levels", Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 200000, Levels: []string{"low", "high"}}},
|
||||
"request.generationConfig.thinkingConfig.thinkingLevel",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
|
||||
if !gjson.GetBytes(got, tt.wantField).Exists() {
|
||||
t.Fatalf("expected field %s in output: %s", tt.wantField, string(got))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func buildGeminiCLIModelInfo(modelID string) *registry.ModelInfo {
|
||||
support := ®istry.ThinkingSupport{}
|
||||
switch modelID {
|
||||
case "gemini-cli-budget":
|
||||
support.Min = 0
|
||||
support.Max = 32768
|
||||
support.ZeroAllowed = true
|
||||
support.DynamicAllowed = true
|
||||
case "gemini-cli-level":
|
||||
support.Min = 128
|
||||
support.Max = 32768
|
||||
support.ZeroAllowed = false
|
||||
support.DynamicAllowed = true
|
||||
support.Levels = []string{"minimal", "low", "medium", "high"}
|
||||
default:
|
||||
// Unknown model - return nil Thinking to trigger error path
|
||||
return ®istry.ModelInfo{ID: modelID, Thinking: nil}
|
||||
}
|
||||
return ®istry.ModelInfo{
|
||||
ID: modelID,
|
||||
Thinking: support,
|
||||
}
|
||||
}
|
||||
160
internal/thinking/provider/iflow/apply.go
Normal file
160
internal/thinking/provider/iflow/apply.go
Normal file
@@ -0,0 +1,160 @@
|
||||
// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
|
||||
//
|
||||
// iFlow models use boolean toggle semantics:
|
||||
// - GLM models: chat_template_kwargs.enable_thinking (boolean)
|
||||
// - MiniMax models: reasoning_split (boolean)
|
||||
//
|
||||
// Level values are converted to boolean: none=false, all others=true
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-9
|
||||
package iflow
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for iFlow models.
|
||||
//
|
||||
// iFlow-specific behavior:
|
||||
// - GLM models: enable_thinking boolean + clear_thinking=false
|
||||
// - MiniMax models: reasoning_split boolean
|
||||
// - Level to boolean: none=false, others=true
|
||||
// - No quantized support (only on/off)
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new iFlow thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("iflow", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to iFlow request body.
|
||||
//
|
||||
// Expected output format (GLM):
|
||||
//
|
||||
// {
|
||||
// "chat_template_kwargs": {
|
||||
// "enable_thinking": true,
|
||||
// "clear_thinking": false
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Expected output format (MiniMax):
|
||||
//
|
||||
// {
|
||||
// "reasoning_split": true
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if modelInfo == nil {
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
modelID := modelInfo.ID
|
||||
if modelID == "" {
|
||||
modelID = "unknown"
|
||||
}
|
||||
return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
|
||||
}
|
||||
|
||||
if isGLMModel(modelInfo.ID) {
|
||||
return applyGLM(body, config), nil
|
||||
}
|
||||
|
||||
if isMiniMaxModel(modelInfo.ID) {
|
||||
return applyMiniMax(body, config), nil
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// configToBoolean converts ThinkingConfig to boolean for iFlow models.
|
||||
//
|
||||
// Conversion rules:
|
||||
// - ModeNone: false
|
||||
// - ModeAuto: true
|
||||
// - ModeBudget + Budget=0: false
|
||||
// - ModeBudget + Budget>0: true
|
||||
// - ModeLevel + Level="none": false
|
||||
// - ModeLevel + any other level: true
|
||||
// - Default (unknown mode): true
|
||||
func configToBoolean(config thinking.ThinkingConfig) bool {
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
return false
|
||||
case thinking.ModeAuto:
|
||||
return true
|
||||
case thinking.ModeBudget:
|
||||
return config.Budget > 0
|
||||
case thinking.ModeLevel:
|
||||
return config.Level != thinking.LevelNone
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// applyGLM applies thinking configuration for GLM models.
|
||||
//
|
||||
// Output format when enabled:
|
||||
//
|
||||
// {"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}}
|
||||
//
|
||||
// Output format when disabled:
|
||||
//
|
||||
// {"chat_template_kwargs": {"enable_thinking": false}}
|
||||
//
|
||||
// Note: clear_thinking is only set when thinking is enabled, to preserve
|
||||
// thinking output in the response.
|
||||
func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
|
||||
enableThinking := configToBoolean(config)
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||
|
||||
// clear_thinking only needed when thinking is enabled
|
||||
if enableThinking {
|
||||
result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// applyMiniMax applies thinking configuration for MiniMax models.
|
||||
//
|
||||
// Output format:
|
||||
//
|
||||
// {"reasoning_split": true/false}
|
||||
func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
|
||||
reasoningSplit := configToBoolean(config)
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// isGLMModel determines if the model is a GLM series model.
|
||||
// GLM models use chat_template_kwargs.enable_thinking format.
|
||||
func isGLMModel(modelID string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(modelID), "glm")
|
||||
}
|
||||
|
||||
// isMiniMaxModel determines if the model is a MiniMax series model.
|
||||
// MiniMax models use reasoning_split format.
|
||||
func isMiniMaxModel(modelID string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(modelID), "minimax")
|
||||
}
|
||||
328
internal/thinking/provider/iflow/apply_test.go
Normal file
328
internal/thinking/provider/iflow/apply_test.go
Normal file
@@ -0,0 +1,328 @@
|
||||
// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
|
||||
package iflow
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestNewApplier(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
}{
|
||||
{"default"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
if applier == nil {
|
||||
t.Fatalf("expected non-nil applier")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplierImplementsInterface(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
applier thinking.ProviderApplier
|
||||
}{
|
||||
{"default", NewApplier()},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if tt.applier == nil {
|
||||
t.Fatalf("expected thinking.ProviderApplier implementation")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyNilModelInfo(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body []byte
|
||||
}{
|
||||
{"nil body", nil},
|
||||
{"empty body", []byte{}},
|
||||
{"json body", []byte(`{"model":"glm-4.6"}`)},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := applier.Apply(tt.body, thinking.ThinkingConfig{}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error, got %v", err)
|
||||
}
|
||||
if !bytes.Equal(got, tt.body) {
|
||||
t.Fatalf("expected body unchanged, got %s", string(got))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMissingThinkingSupport(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
modelID string
|
||||
wantModel string
|
||||
}{
|
||||
{"model id", "glm-4.6", "glm-4.6"},
|
||||
{"empty model id", "", "unknown"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := ®istry.ModelInfo{ID: tt.modelID}
|
||||
got, err := applier.Apply([]byte(`{"model":"`+tt.modelID+`"}`), thinking.ThinkingConfig{}, modelInfo)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got nil")
|
||||
}
|
||||
if got != nil {
|
||||
t.Fatalf("expected nil body on error, got %s", string(got))
|
||||
}
|
||||
thinkingErr, ok := err.(*thinking.ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("expected ThinkingError, got %T", err)
|
||||
}
|
||||
if thinkingErr.Code != thinking.ErrThinkingNotSupported {
|
||||
t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
|
||||
}
|
||||
if thinkingErr.Model != tt.wantModel {
|
||||
t.Fatalf("expected model %s, got %s", tt.wantModel, thinkingErr.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigToBoolean(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
want bool
|
||||
}{
|
||||
{"mode none", thinking.ThinkingConfig{Mode: thinking.ModeNone}, false},
|
||||
{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true},
|
||||
{"budget zero", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false},
|
||||
{"budget positive", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true},
|
||||
{"level none", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false},
|
||||
{"level minimal", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true},
|
||||
{"level low", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true},
|
||||
{"level medium", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true},
|
||||
{"level high", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true},
|
||||
{"level xhigh", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true},
|
||||
{"zero value config", thinking.ThinkingConfig{}, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := configToBoolean(tt.config); got != tt.want {
|
||||
t.Fatalf("configToBoolean(%+v) = %v, want %v", tt.config, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyGLM(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
modelID string
|
||||
body []byte
|
||||
config thinking.ThinkingConfig
|
||||
wantEnable bool
|
||||
wantPreserve string
|
||||
}{
|
||||
{"mode none", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, ""},
|
||||
{"level none", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, ""},
|
||||
{"mode auto", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
|
||||
{"level minimal", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, ""},
|
||||
{"level low", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, ""},
|
||||
{"level medium", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, ""},
|
||||
{"level high", "GLM-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, ""},
|
||||
{"level xhigh", "glm-z1-preview", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, ""},
|
||||
{"budget zero", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, ""},
|
||||
{"budget 1000", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, ""},
|
||||
{"preserve fields", "glm-4.6", []byte(`{"model":"glm-4.6","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "glm-4.6"},
|
||||
{"empty body", "glm-4.6", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
|
||||
{"malformed json", "glm-4.6", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
ID: tt.modelID,
|
||||
Thinking: ®istry.ThinkingSupport{},
|
||||
}
|
||||
got, err := applier.Apply(tt.body, tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if !gjson.ValidBytes(got) {
|
||||
t.Fatalf("expected valid JSON, got %s", string(got))
|
||||
}
|
||||
|
||||
enableResult := gjson.GetBytes(got, "chat_template_kwargs.enable_thinking")
|
||||
if !enableResult.Exists() {
|
||||
t.Fatalf("enable_thinking missing")
|
||||
}
|
||||
gotEnable := enableResult.Bool()
|
||||
if gotEnable != tt.wantEnable {
|
||||
t.Fatalf("enable_thinking = %v, want %v", gotEnable, tt.wantEnable)
|
||||
}
|
||||
|
||||
// clear_thinking only set when enable_thinking=true
|
||||
clearResult := gjson.GetBytes(got, "chat_template_kwargs.clear_thinking")
|
||||
if tt.wantEnable {
|
||||
if !clearResult.Exists() {
|
||||
t.Fatalf("clear_thinking missing when enable_thinking=true")
|
||||
}
|
||||
if clearResult.Bool() {
|
||||
t.Fatalf("clear_thinking = %v, want false", clearResult.Bool())
|
||||
}
|
||||
} else {
|
||||
if clearResult.Exists() {
|
||||
t.Fatalf("clear_thinking should not exist when enable_thinking=false")
|
||||
}
|
||||
}
|
||||
|
||||
if tt.wantPreserve != "" {
|
||||
gotModel := gjson.GetBytes(got, "model").String()
|
||||
if gotModel != tt.wantPreserve {
|
||||
t.Fatalf("model = %q, want %q", gotModel, tt.wantPreserve)
|
||||
}
|
||||
if !gjson.GetBytes(got, "extra.keep").Bool() {
|
||||
t.Fatalf("expected extra.keep preserved")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMiniMax(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
modelID string
|
||||
body []byte
|
||||
config thinking.ThinkingConfig
|
||||
wantSplit bool
|
||||
wantModel string
|
||||
wantKeep bool
|
||||
}{
|
||||
{"mode none", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, "", false},
|
||||
{"level none", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, "", false},
|
||||
{"mode auto", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
|
||||
{"level high", "MINIMAX-M2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, "", false},
|
||||
{"level low", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, "", false},
|
||||
{"level minimal", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, "", false},
|
||||
{"level medium", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, "", false},
|
||||
{"level xhigh", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, "", false},
|
||||
{"budget zero", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, "", false},
|
||||
{"budget 1000", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, "", false},
|
||||
{"unknown level", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "unknown"}, true, "", false},
|
||||
{"preserve fields", "minimax-m2", []byte(`{"model":"minimax-m2","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "minimax-m2", true},
|
||||
{"empty body", "minimax-m2", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
|
||||
{"malformed json", "minimax-m2", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
ID: tt.modelID,
|
||||
Thinking: ®istry.ThinkingSupport{},
|
||||
}
|
||||
got, err := applier.Apply(tt.body, tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if !gjson.ValidBytes(got) {
|
||||
t.Fatalf("expected valid JSON, got %s", string(got))
|
||||
}
|
||||
|
||||
splitResult := gjson.GetBytes(got, "reasoning_split")
|
||||
if !splitResult.Exists() {
|
||||
t.Fatalf("reasoning_split missing")
|
||||
}
|
||||
// Verify JSON type is boolean, not string
|
||||
if splitResult.Type != gjson.True && splitResult.Type != gjson.False {
|
||||
t.Fatalf("reasoning_split should be boolean, got type %v", splitResult.Type)
|
||||
}
|
||||
gotSplit := splitResult.Bool()
|
||||
if gotSplit != tt.wantSplit {
|
||||
t.Fatalf("reasoning_split = %v, want %v", gotSplit, tt.wantSplit)
|
||||
}
|
||||
|
||||
if tt.wantModel != "" {
|
||||
gotModel := gjson.GetBytes(got, "model").String()
|
||||
if gotModel != tt.wantModel {
|
||||
t.Fatalf("model = %q, want %q", gotModel, tt.wantModel)
|
||||
}
|
||||
if tt.wantKeep && !gjson.GetBytes(got, "extra.keep").Bool() {
|
||||
t.Fatalf("expected extra.keep preserved")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsGLMModel tests the GLM model detection.
|
||||
//
|
||||
// Depends on: Epic 9 Story 9-1
|
||||
func TestIsGLMModel(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantGLM bool
|
||||
}{
|
||||
{"glm-4.6", "glm-4.6", true},
|
||||
{"glm-z1-preview", "glm-z1-preview", true},
|
||||
{"glm uppercase", "GLM-4.7", true},
|
||||
{"minimax-01", "minimax-01", false},
|
||||
{"gpt-5.2", "gpt-5.2", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := isGLMModel(tt.model); got != tt.wantGLM {
|
||||
t.Fatalf("isGLMModel(%q) = %v, want %v", tt.model, got, tt.wantGLM)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsMiniMaxModel tests the MiniMax model detection.
|
||||
//
|
||||
// Depends on: Epic 9 Story 9-1
|
||||
func TestIsMiniMaxModel(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantMiniMax bool
|
||||
}{
|
||||
{"minimax-01", "minimax-01", true},
|
||||
{"minimax uppercase", "MINIMAX-M2", true},
|
||||
{"glm-4.6", "glm-4.6", false},
|
||||
{"gpt-5.2", "gpt-5.2", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := isMiniMaxModel(tt.model); got != tt.wantMiniMax {
|
||||
t.Fatalf("isMiniMaxModel(%q) = %v, want %v", tt.model, got, tt.wantMiniMax)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
135
internal/thinking/provider/openai/apply.go
Normal file
135
internal/thinking/provider/openai/apply.go
Normal file
@@ -0,0 +1,135 @@
|
||||
// Package openai implements thinking configuration for OpenAI/Codex models.
|
||||
//
|
||||
// OpenAI models use the reasoning_effort format with discrete levels
|
||||
// (low/medium/high). Some models support xhigh and none levels.
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
|
||||
package openai
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for OpenAI models.
|
||||
//
|
||||
// OpenAI-specific behavior:
|
||||
// - Output format: reasoning_effort (string: low/medium/high/xhigh)
|
||||
// - Level-only mode: no numeric budget support
|
||||
// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new OpenAI thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("openai", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to OpenAI request body.
|
||||
//
|
||||
// Expected output format:
|
||||
//
|
||||
// {
|
||||
// "reasoning_effort": "high"
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if modelInfo == nil {
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
if modelInfo.Type == "" {
|
||||
modelID := modelInfo.ID
|
||||
if modelID == "" {
|
||||
modelID = "unknown"
|
||||
}
|
||||
return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
|
||||
}
|
||||
return applyCompatibleOpenAI(body, config)
|
||||
}
|
||||
|
||||
// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
|
||||
if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel {
|
||||
result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
effort := ""
|
||||
support := modelInfo.Thinking
|
||||
if config.Budget == 0 {
|
||||
if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
|
||||
effort = string(thinking.LevelNone)
|
||||
}
|
||||
}
|
||||
if effort == "" && config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
if effort == "" && len(support.Levels) > 0 {
|
||||
effort = support.Levels[0]
|
||||
}
|
||||
if effort == "" {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
var effort string
|
||||
switch config.Mode {
|
||||
case thinking.ModeLevel:
|
||||
if config.Level == "" {
|
||||
return body, nil
|
||||
}
|
||||
effort = string(config.Level)
|
||||
case thinking.ModeNone:
|
||||
effort = string(thinking.LevelNone)
|
||||
if config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
case thinking.ModeAuto:
|
||||
// Auto mode for user-defined models: pass through as "auto"
|
||||
effort = string(thinking.LevelAuto)
|
||||
case thinking.ModeBudget:
|
||||
// Budget mode: convert budget to level using threshold mapping
|
||||
level, ok := thinking.ConvertBudgetToLevel(config.Budget)
|
||||
if !ok {
|
||||
return body, nil
|
||||
}
|
||||
effort = level
|
||||
default:
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func hasLevel(levels []string, target string) bool {
|
||||
for _, level := range levels {
|
||||
if strings.EqualFold(strings.TrimSpace(level), target) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
343
internal/thinking/provider/openai/apply_test.go
Normal file
343
internal/thinking/provider/openai/apply_test.go
Normal file
@@ -0,0 +1,343 @@
|
||||
// Package openai implements thinking configuration for OpenAI/Codex models.
|
||||
package openai
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func buildOpenAIModelInfo(modelID string) *registry.ModelInfo {
|
||||
info := registry.LookupStaticModelInfo(modelID)
|
||||
if info != nil {
|
||||
return info
|
||||
}
|
||||
// Fallback with complete ThinkingSupport matching real OpenAI model capabilities
|
||||
return ®istry.ModelInfo{
|
||||
ID: modelID,
|
||||
Thinking: ®istry.ThinkingSupport{
|
||||
Min: 1024,
|
||||
Max: 32768,
|
||||
ZeroAllowed: true,
|
||||
Levels: []string{"none", "low", "medium", "high", "xhigh"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewApplier(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
if applier == nil {
|
||||
t.Fatalf("expected non-nil applier")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplierImplementsInterface(t *testing.T) {
|
||||
_, ok := interface{}(NewApplier()).(thinking.ProviderApplier)
|
||||
if !ok {
|
||||
t.Fatalf("expected Applier to implement thinking.ProviderApplier")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyNilModelInfo(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
body := []byte(`{"model":"gpt-5.2"}`)
|
||||
got, err := applier.Apply(body, thinking.ThinkingConfig{}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error, got %v", err)
|
||||
}
|
||||
if string(got) != string(body) {
|
||||
t.Fatalf("expected body unchanged, got %s", string(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMissingThinkingSupport(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := ®istry.ModelInfo{ID: "gpt-5.2"}
|
||||
got, err := applier.Apply([]byte(`{"model":"gpt-5.2"}`), thinking.ThinkingConfig{}, modelInfo)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got nil")
|
||||
}
|
||||
if got != nil {
|
||||
t.Fatalf("expected nil body on error, got %s", string(got))
|
||||
}
|
||||
thinkingErr, ok := err.(*thinking.ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("expected ThinkingError, got %T", err)
|
||||
}
|
||||
if thinkingErr.Code != thinking.ErrThinkingNotSupported {
|
||||
t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
|
||||
}
|
||||
if thinkingErr.Model != "gpt-5.2" {
|
||||
t.Fatalf("expected model gpt-5.2, got %s", thinkingErr.Model)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyLevel tests Apply with ModeLevel (unit test, no ValidateConfig).
|
||||
func TestApplyLevel(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildOpenAIModelInfo("gpt-5.2")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
level thinking.ThinkingLevel
|
||||
want string
|
||||
}{
|
||||
{"high", thinking.LevelHigh, "high"},
|
||||
{"medium", thinking.LevelMedium, "medium"},
|
||||
{"low", thinking.LevelLow, "low"},
|
||||
{"xhigh", thinking.LevelXHigh, "xhigh"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply([]byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: tt.level}, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
|
||||
t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyModeNone tests Apply with ModeNone (unit test).
|
||||
func TestApplyModeNone(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
modelInfo *registry.ModelInfo
|
||||
want string
|
||||
}{
|
||||
{"zero allowed", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, ®istry.ModelInfo{ID: "gpt-5.2", Thinking: ®istry.ThinkingSupport{ZeroAllowed: true, Levels: []string{"none", "low"}}}, "none"},
|
||||
{"clamped to level", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 128, Level: thinking.LevelLow}, ®istry.ModelInfo{ID: "gpt-5", Thinking: ®istry.ThinkingSupport{Levels: []string{"minimal", "low"}}}, "low"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
|
||||
t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPassthrough tests that unsupported modes pass through unchanged.
|
||||
func TestApplyPassthrough(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildOpenAIModelInfo("gpt-5.2")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config thinking.ThinkingConfig
|
||||
}{
|
||||
{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}},
|
||||
{"mode budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
body := []byte(`{"model":"gpt-5.2"}`)
|
||||
result, err := applier.Apply(body, tt.config, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if string(result) != string(body) {
|
||||
t.Fatalf("Apply() result = %s, want %s", string(result), string(body))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyInvalidBody tests Apply with invalid body input.
|
||||
func TestApplyInvalidBody(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildOpenAIModelInfo("gpt-5.2")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
body []byte
|
||||
}{
|
||||
{"nil body", nil},
|
||||
{"empty body", []byte{}},
|
||||
{"invalid json", []byte(`{"not json"`)},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := applier.Apply(tt.body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if !gjson.ValidBytes(result) {
|
||||
t.Fatalf("Apply() result is not valid JSON: %s", string(result))
|
||||
}
|
||||
if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "high" {
|
||||
t.Fatalf("reasoning_effort = %q, want %q", got, "high")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPreservesFields tests that existing body fields are preserved.
|
||||
func TestApplyPreservesFields(t *testing.T) {
|
||||
applier := NewApplier()
|
||||
modelInfo := buildOpenAIModelInfo("gpt-5.2")
|
||||
|
||||
body := []byte(`{"model":"gpt-5.2","messages":[]}`)
|
||||
result, err := applier.Apply(body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if got := gjson.GetBytes(result, "model").String(); got != "gpt-5.2" {
|
||||
t.Fatalf("model = %q, want %q", got, "gpt-5.2")
|
||||
}
|
||||
if !gjson.GetBytes(result, "messages").Exists() {
|
||||
t.Fatalf("messages missing from result: %s", string(result))
|
||||
}
|
||||
if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "low" {
|
||||
t.Fatalf("reasoning_effort = %q, want %q", got, "low")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHasLevel tests the hasLevel helper function.
|
||||
func TestHasLevel(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
levels []string
|
||||
target string
|
||||
want bool
|
||||
}{
|
||||
{"exact match", []string{"low", "medium", "high"}, "medium", true},
|
||||
{"case insensitive", []string{"low", "medium", "high"}, "MEDIUM", true},
|
||||
{"with spaces", []string{"low", " medium ", "high"}, "medium", true},
|
||||
{"not found", []string{"low", "medium", "high"}, "xhigh", false},
|
||||
{"empty levels", []string{}, "medium", false},
|
||||
{"none level", []string{"none", "low", "medium"}, "none", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := hasLevel(tt.levels, tt.target); got != tt.want {
|
||||
t.Fatalf("hasLevel(%v, %q) = %v, want %v", tt.levels, tt.target, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// --- End-to-End Tests (ValidateConfig → Apply) ---
|
||||
|
||||
// TestE2EApply tests the full flow: ValidateConfig → Apply.
|
||||
func TestE2EApply(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
want string
|
||||
}{
|
||||
{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high"},
|
||||
{"level medium", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium"},
|
||||
{"level low", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "low"},
|
||||
{"level xhigh", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, "xhigh"},
|
||||
{"mode none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "none"},
|
||||
{"budget to level", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "medium"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildOpenAIModelInfo(tt.model)
|
||||
normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig() error = %v", err)
|
||||
}
|
||||
|
||||
applier := NewApplier()
|
||||
result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
|
||||
t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestE2EApplyOutputFormat tests the full flow with exact JSON output verification.
|
||||
func TestE2EApplyOutputFormat(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
config thinking.ThinkingConfig
|
||||
wantJSON string
|
||||
}{
|
||||
{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, `{"reasoning_effort":"high"}`},
|
||||
{"level none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, `{"reasoning_effort":"none"}`},
|
||||
{"budget converted", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, `{"reasoning_effort":"medium"}`},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildOpenAIModelInfo(tt.model)
|
||||
normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig() error = %v", err)
|
||||
}
|
||||
|
||||
applier := NewApplier()
|
||||
result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if string(result) != tt.wantJSON {
|
||||
t.Fatalf("Apply() result = %s, want %s", string(result), tt.wantJSON)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestE2EApplyWithExistingBody tests the full flow with existing body fields.
|
||||
func TestE2EApplyWithExistingBody(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
config thinking.ThinkingConfig
|
||||
wantEffort string
|
||||
wantModel string
|
||||
}{
|
||||
{"empty body", `{}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high", ""},
|
||||
{"preserve fields", `{"model":"gpt-5.2","messages":[]}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium", "gpt-5.2"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
modelInfo := buildOpenAIModelInfo("gpt-5.2")
|
||||
normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig() error = %v", err)
|
||||
}
|
||||
|
||||
applier := NewApplier()
|
||||
result, err := applier.Apply([]byte(tt.body), *normalized, modelInfo)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply() error = %v", err)
|
||||
}
|
||||
if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.wantEffort {
|
||||
t.Fatalf("reasoning_effort = %q, want %q", got, tt.wantEffort)
|
||||
}
|
||||
if tt.wantModel != "" {
|
||||
if got := gjson.GetBytes(result, "model").String(); got != tt.wantModel {
|
||||
t.Fatalf("model = %q, want %q", got, tt.wantModel)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
51
internal/thinking/provider_map_test.go
Normal file
51
internal/thinking/provider_map_test.go
Normal file
@@ -0,0 +1,51 @@
|
||||
// Package thinking_test provides external tests for the thinking package.
|
||||
//
|
||||
// This file uses package thinking_test (external) to allow importing provider
|
||||
// subpackages, which triggers their init() functions to register appliers.
|
||||
// This avoids import cycles that would occur if thinking package imported providers directly.
|
||||
package thinking_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
|
||||
// Blank imports to trigger provider init() registration
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
|
||||
)
|
||||
|
||||
func TestProviderAppliersBasic(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
provider string
|
||||
wantNil bool
|
||||
}{
|
||||
{"gemini provider", "gemini", false},
|
||||
{"gemini-cli provider", "gemini-cli", false},
|
||||
{"claude provider", "claude", false},
|
||||
{"openai provider", "openai", false},
|
||||
{"iflow provider", "iflow", false},
|
||||
{"antigravity provider", "antigravity", false},
|
||||
{"unknown provider", "unknown", true},
|
||||
{"empty provider", "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := thinking.GetProviderApplier(tt.provider)
|
||||
if tt.wantNil {
|
||||
if got != nil {
|
||||
t.Fatalf("GetProviderApplier(%q) = %T, want nil", tt.provider, got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatalf("GetProviderApplier(%q) = nil, want non-nil", tt.provider)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
54
internal/thinking/strip.go
Normal file
54
internal/thinking/strip.go
Normal file
@@ -0,0 +1,54 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// StripThinkingConfig removes thinking configuration fields from request body.
|
||||
//
|
||||
// This function is used when a model doesn't support thinking but the request
|
||||
// contains thinking configuration. The configuration is silently removed to
|
||||
// prevent upstream API errors.
|
||||
//
|
||||
// Parameters:
|
||||
// - body: Original request body JSON
|
||||
// - provider: Provider name (determines which fields to strip)
|
||||
//
|
||||
// Returns:
|
||||
// - Modified request body JSON with thinking configuration removed
|
||||
// - Original body is returned unchanged if:
|
||||
// - body is empty or invalid JSON
|
||||
// - provider is unknown
|
||||
// - no thinking configuration found
|
||||
func StripThinkingConfig(body []byte, provider string) []byte {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
return body
|
||||
}
|
||||
|
||||
switch provider {
|
||||
case "claude":
|
||||
result, _ := sjson.DeleteBytes(body, "thinking")
|
||||
return result
|
||||
case "gemini":
|
||||
result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
|
||||
return result
|
||||
case "gemini-cli", "antigravity":
|
||||
result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
|
||||
return result
|
||||
case "openai":
|
||||
result, _ := sjson.DeleteBytes(body, "reasoning_effort")
|
||||
return result
|
||||
case "codex":
|
||||
result, _ := sjson.DeleteBytes(body, "reasoning.effort")
|
||||
return result
|
||||
case "iflow":
|
||||
result, _ := sjson.DeleteBytes(body, "chat_template_kwargs.enable_thinking")
|
||||
result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
|
||||
result, _ = sjson.DeleteBytes(result, "reasoning_split")
|
||||
return result
|
||||
default:
|
||||
return body
|
||||
}
|
||||
}
|
||||
66
internal/thinking/strip_test.go
Normal file
66
internal/thinking/strip_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
// Package thinking_test provides tests for thinking config stripping.
|
||||
package thinking_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestStripThinkingConfig(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
provider string
|
||||
stripped []string
|
||||
preserved []string
|
||||
}{
|
||||
{"claude thinking", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude", []string{"thinking"}, []string{"model"}},
|
||||
{"gemini thinkingConfig", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}},
|
||||
{"gemini-cli thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}}`, "gemini-cli", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.temperature"}},
|
||||
{"antigravity thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":4096},"maxTokens":1024}}}`, "antigravity", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.maxTokens"}},
|
||||
{"openai reasoning_effort", `{"reasoning_effort":"high","model":"gpt-5"}`, "openai", []string{"reasoning_effort"}, []string{"model"}},
|
||||
{"iflow glm", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}},
|
||||
{"iflow minimax", `{"reasoning_split":true,"model":"minimax"}`, "iflow", []string{"reasoning_split"}, []string{"model"}},
|
||||
{"iflow both formats", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false},"reasoning_split":true,"model":"mixed"}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking", "reasoning_split"}, []string{"model"}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider)
|
||||
|
||||
for _, path := range tt.stripped {
|
||||
if gjson.GetBytes(got, path).Exists() {
|
||||
t.Fatalf("expected %s to be stripped, got %s", path, string(got))
|
||||
}
|
||||
}
|
||||
for _, path := range tt.preserved {
|
||||
if !gjson.GetBytes(got, path).Exists() {
|
||||
t.Fatalf("expected %s to be preserved, got %s", path, string(got))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripThinkingConfigPassthrough(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
provider string
|
||||
}{
|
||||
{"empty body", ``, "claude"},
|
||||
{"invalid json", `{not valid`, "claude"},
|
||||
{"unknown provider", `{"thinking":{"budget_tokens":8192}}`, "unknown"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider)
|
||||
if string(got) != tt.body {
|
||||
t.Fatalf("StripThinkingConfig() = %s, want passthrough %s", string(got), tt.body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
170
internal/thinking/suffix.go
Normal file
170
internal/thinking/suffix.go
Normal file
@@ -0,0 +1,170 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
//
|
||||
// This file implements suffix parsing functionality for extracting
|
||||
// thinking configuration from model names in the format model(value).
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseSuffix extracts thinking suffix from a model name.
|
||||
//
|
||||
// The suffix format is: model-name(value)
|
||||
// Examples:
|
||||
// - "claude-sonnet-4-5(16384)" -> ModelName="claude-sonnet-4-5", RawSuffix="16384"
|
||||
// - "gpt-5.2(high)" -> ModelName="gpt-5.2", RawSuffix="high"
|
||||
// - "gemini-2.5-pro" -> ModelName="gemini-2.5-pro", HasSuffix=false
|
||||
//
|
||||
// This function only extracts the suffix; it does not validate or interpret
|
||||
// the suffix content. Use ParseNumericSuffix, ParseLevelSuffix, etc. for
|
||||
// content interpretation.
|
||||
func ParseSuffix(model string) SuffixResult {
|
||||
// Find the last opening parenthesis
|
||||
lastOpen := strings.LastIndex(model, "(")
|
||||
if lastOpen == -1 {
|
||||
return SuffixResult{ModelName: model, HasSuffix: false}
|
||||
}
|
||||
|
||||
// Check if the string ends with a closing parenthesis
|
||||
if !strings.HasSuffix(model, ")") {
|
||||
return SuffixResult{ModelName: model, HasSuffix: false}
|
||||
}
|
||||
|
||||
// Extract components
|
||||
modelName := model[:lastOpen]
|
||||
rawSuffix := model[lastOpen+1 : len(model)-1]
|
||||
|
||||
return SuffixResult{
|
||||
ModelName: modelName,
|
||||
HasSuffix: true,
|
||||
RawSuffix: rawSuffix,
|
||||
}
|
||||
}
|
||||
|
||||
// ParseSuffixWithError extracts thinking suffix and returns an error on invalid format.
|
||||
//
|
||||
// Invalid format cases:
|
||||
// - Contains "(" but does not end with ")"
|
||||
// - Contains ")" without any "("
|
||||
//
|
||||
// The error message includes the original input for debugging context.
|
||||
func ParseSuffixWithError(model string) (SuffixResult, error) {
|
||||
lastOpen := strings.LastIndex(model, "(")
|
||||
if lastOpen == -1 {
|
||||
if strings.Contains(model, ")") {
|
||||
return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model))
|
||||
}
|
||||
return SuffixResult{ModelName: model, HasSuffix: false}, nil
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(model, ")") {
|
||||
return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model))
|
||||
}
|
||||
|
||||
return ParseSuffix(model), nil
|
||||
}
|
||||
|
||||
// ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value.
|
||||
//
|
||||
// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer.
|
||||
// Only non-negative integers are considered valid numeric suffixes.
|
||||
//
|
||||
// Platform note: The budget value uses Go's int type, which is 32-bit on 32-bit
|
||||
// systems and 64-bit on 64-bit systems. Values exceeding the platform's int range
|
||||
// will return ok=false.
|
||||
//
|
||||
// Leading zeros are accepted: "08192" parses as 8192.
|
||||
//
|
||||
// Examples:
|
||||
// - "8192" -> budget=8192, ok=true
|
||||
// - "0" -> budget=0, ok=true (represents ModeNone)
|
||||
// - "08192" -> budget=8192, ok=true (leading zeros accepted)
|
||||
// - "-1" -> budget=0, ok=false (negative numbers are not valid numeric suffixes)
|
||||
// - "high" -> budget=0, ok=false (not a number)
|
||||
// - "9223372036854775808" -> budget=0, ok=false (overflow on 64-bit systems)
|
||||
//
|
||||
// For special handling of -1 as auto mode, use ParseSpecialSuffix instead.
|
||||
func ParseNumericSuffix(rawSuffix string) (budget int, ok bool) {
|
||||
if rawSuffix == "" {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
value, err := strconv.Atoi(rawSuffix)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// Negative numbers are not valid numeric suffixes
|
||||
// -1 should be handled by special value parsing as "auto"
|
||||
if value < 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return value, true
|
||||
}
|
||||
|
||||
// ParseSpecialSuffix attempts to parse a raw suffix as a special thinking mode value.
|
||||
//
|
||||
// This function handles special strings that represent a change in thinking mode:
|
||||
// - "none" -> ModeNone (disables thinking)
|
||||
// - "auto" -> ModeAuto (automatic/dynamic thinking)
|
||||
// - "-1" -> ModeAuto (numeric representation of auto mode)
|
||||
//
|
||||
// String values are case-insensitive.
|
||||
func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) {
|
||||
if rawSuffix == "" {
|
||||
return ModeBudget, false
|
||||
}
|
||||
|
||||
// Case-insensitive matching
|
||||
switch strings.ToLower(rawSuffix) {
|
||||
case "none":
|
||||
return ModeNone, true
|
||||
case "auto", "-1":
|
||||
return ModeAuto, true
|
||||
default:
|
||||
return ModeBudget, false
|
||||
}
|
||||
}
|
||||
|
||||
// ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level.
|
||||
//
|
||||
// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level.
|
||||
// Only discrete effort levels are valid: minimal, low, medium, high, xhigh.
|
||||
// Level matching is case-insensitive.
|
||||
//
|
||||
// Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix
|
||||
// instead. This separation allows callers to prioritize special value handling.
|
||||
//
|
||||
// Examples:
|
||||
// - "high" -> level=LevelHigh, ok=true
|
||||
// - "HIGH" -> level=LevelHigh, ok=true (case insensitive)
|
||||
// - "medium" -> level=LevelMedium, ok=true
|
||||
// - "none" -> level="", ok=false (special value, use ParseSpecialSuffix)
|
||||
// - "auto" -> level="", ok=false (special value, use ParseSpecialSuffix)
|
||||
// - "8192" -> level="", ok=false (numeric, use ParseNumericSuffix)
|
||||
// - "ultra" -> level="", ok=false (unknown level)
|
||||
func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) {
|
||||
if rawSuffix == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Case-insensitive matching
|
||||
switch strings.ToLower(rawSuffix) {
|
||||
case "minimal":
|
||||
return LevelMinimal, true
|
||||
case "low":
|
||||
return LevelLow, true
|
||||
case "medium":
|
||||
return LevelMedium, true
|
||||
case "high":
|
||||
return LevelHigh, true
|
||||
case "xhigh":
|
||||
return LevelXHigh, true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
313
internal/thinking/suffix_test.go
Normal file
313
internal/thinking/suffix_test.go
Normal file
@@ -0,0 +1,313 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestParseSuffix tests the ParseSuffix function.
|
||||
//
|
||||
// ParseSuffix extracts thinking suffix from model name.
|
||||
// Format: model-name(value) where value is the raw suffix content.
|
||||
// This function only extracts; interpretation is done by other functions.
|
||||
func TestParseSuffix(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantModel string
|
||||
wantSuffix bool
|
||||
wantRaw string
|
||||
}{
|
||||
{"no suffix", "claude-sonnet-4-5", "claude-sonnet-4-5", false, ""},
|
||||
{"numeric suffix", "model(1000)", "model", true, "1000"},
|
||||
{"level suffix", "gpt-5(high)", "gpt-5", true, "high"},
|
||||
{"auto suffix", "gemini-2.5-pro(auto)", "gemini-2.5-pro", true, "auto"},
|
||||
{"none suffix", "model(none)", "model", true, "none"},
|
||||
{"complex model name", "gemini-2.5-flash-lite(8192)", "gemini-2.5-flash-lite", true, "8192"},
|
||||
{"alias with suffix", "g25p(1000)", "g25p", true, "1000"},
|
||||
{"empty suffix", "model()", "model", true, ""},
|
||||
{"nested parens", "model(a(b))", "model(a", true, "b)"},
|
||||
{"no model name", "(1000)", "", true, "1000"},
|
||||
{"unmatched open", "model(", "model(", false, ""},
|
||||
{"unmatched close", "model)", "model)", false, ""},
|
||||
{"paren not at end", "model(1000)extra", "model(1000)extra", false, ""},
|
||||
{"empty string", "", "", false, ""},
|
||||
{"large budget", "claude-opus(128000)", "claude-opus", true, "128000"},
|
||||
{"xhigh level", "gpt-5.2(xhigh)", "gpt-5.2", true, "xhigh"},
|
||||
{"minimal level", "model(minimal)", "model", true, "minimal"},
|
||||
{"medium level", "model(medium)", "model", true, "medium"},
|
||||
{"low level", "model(low)", "model", true, "low"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ParseSuffix(tt.model)
|
||||
if got.ModelName != tt.wantModel {
|
||||
t.Errorf("ModelName = %q, want %q", got.ModelName, tt.wantModel)
|
||||
}
|
||||
if got.HasSuffix != tt.wantSuffix {
|
||||
t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantSuffix)
|
||||
}
|
||||
if got.RawSuffix != tt.wantRaw {
|
||||
t.Errorf("RawSuffix = %q, want %q", got.RawSuffix, tt.wantRaw)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseSuffixWithError tests invalid suffix error reporting.
|
||||
func TestParseSuffixWithError(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantHasSuffix bool
|
||||
}{
|
||||
{"missing close paren", "model(abc", false},
|
||||
{"unmatched close paren", "model)", false},
|
||||
{"paren not at end", "model(1000)extra", false},
|
||||
{"no suffix", "gpt-5", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ParseSuffixWithError(tt.model)
|
||||
if tt.name == "no suffix" {
|
||||
if err != nil {
|
||||
t.Fatalf("ParseSuffixWithError(%q) error = %v, want nil", tt.model, err)
|
||||
}
|
||||
if got.HasSuffix != tt.wantHasSuffix {
|
||||
t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
t.Fatalf("ParseSuffixWithError(%q) error = nil, want error", tt.model)
|
||||
}
|
||||
thinkingErr, ok := err.(*ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("ParseSuffixWithError(%q) error type = %T, want *ThinkingError", tt.model, err)
|
||||
}
|
||||
if thinkingErr.Code != ErrInvalidSuffix {
|
||||
t.Errorf("error code = %v, want %v", thinkingErr.Code, ErrInvalidSuffix)
|
||||
}
|
||||
if !strings.Contains(thinkingErr.Message, tt.model) {
|
||||
t.Errorf("message %q does not include input %q", thinkingErr.Message, tt.model)
|
||||
}
|
||||
if got.HasSuffix != tt.wantHasSuffix {
|
||||
t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseSuffixNumeric tests numeric suffix parsing.
|
||||
//
|
||||
// ParseNumericSuffix parses raw suffix content as integer budget.
|
||||
// Only non-negative integers are valid. Negative numbers return ok=false.
|
||||
func TestParseSuffixNumeric(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
rawSuffix string
|
||||
wantBudget int
|
||||
wantOK bool
|
||||
}{
|
||||
{"small budget", "512", 512, true},
|
||||
{"standard budget", "8192", 8192, true},
|
||||
{"large budget", "100000", 100000, true},
|
||||
{"max int32", "2147483647", 2147483647, true},
|
||||
{"max int64", "9223372036854775807", 9223372036854775807, true},
|
||||
{"zero", "0", 0, true},
|
||||
{"negative one", "-1", 0, false},
|
||||
{"negative", "-100", 0, false},
|
||||
{"int64 overflow", "9223372036854775808", 0, false},
|
||||
{"large overflow", "99999999999999999999", 0, false},
|
||||
{"not a number", "abc", 0, false},
|
||||
{"level string", "high", 0, false},
|
||||
{"float", "1.5", 0, false},
|
||||
{"empty", "", 0, false},
|
||||
{"leading zero", "08192", 8192, true},
|
||||
{"whitespace", " 8192 ", 0, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
budget, ok := ParseNumericSuffix(tt.rawSuffix)
|
||||
if budget != tt.wantBudget {
|
||||
t.Errorf("budget = %d, want %d", budget, tt.wantBudget)
|
||||
}
|
||||
if ok != tt.wantOK {
|
||||
t.Errorf("ok = %v, want %v", ok, tt.wantOK)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseSuffixLevel tests level suffix parsing.
|
||||
//
|
||||
// ParseLevelSuffix parses raw suffix content as discrete thinking level.
|
||||
// Only effort levels (minimal, low, medium, high, xhigh) are valid.
|
||||
// Special values (none, auto) return ok=false - use ParseSpecialSuffix instead.
|
||||
func TestParseSuffixLevel(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
rawSuffix string
|
||||
wantLevel ThinkingLevel
|
||||
wantOK bool
|
||||
}{
|
||||
{"minimal", "minimal", LevelMinimal, true},
|
||||
{"low", "low", LevelLow, true},
|
||||
{"medium", "medium", LevelMedium, true},
|
||||
{"high", "high", LevelHigh, true},
|
||||
{"xhigh", "xhigh", LevelXHigh, true},
|
||||
{"case HIGH", "HIGH", LevelHigh, true},
|
||||
{"case High", "High", LevelHigh, true},
|
||||
{"case hIgH", "hIgH", LevelHigh, true},
|
||||
{"case MINIMAL", "MINIMAL", LevelMinimal, true},
|
||||
{"case XHigh", "XHigh", LevelXHigh, true},
|
||||
{"none special", "none", "", false},
|
||||
{"auto special", "auto", "", false},
|
||||
{"unknown ultra", "ultra", "", false},
|
||||
{"unknown maximum", "maximum", "", false},
|
||||
{"unknown invalid", "invalid", "", false},
|
||||
{"numeric", "8192", "", false},
|
||||
{"numeric zero", "0", "", false},
|
||||
{"empty", "", "", false},
|
||||
{"whitespace", " high ", "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
level, ok := ParseLevelSuffix(tt.rawSuffix)
|
||||
if level != tt.wantLevel {
|
||||
t.Errorf("level = %q, want %q", level, tt.wantLevel)
|
||||
}
|
||||
if ok != tt.wantOK {
|
||||
t.Errorf("ok = %v, want %v", ok, tt.wantOK)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseSuffixSpecialValues tests special value suffix parsing.
|
||||
//
|
||||
// Depends on: Epic 3 Story 3-4 (special value suffix parsing)
|
||||
func TestParseSuffixSpecialValues(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
rawSuffix string
|
||||
wantMode ThinkingMode
|
||||
wantOK bool
|
||||
}{
|
||||
{"none", "none", ModeNone, true},
|
||||
{"auto", "auto", ModeAuto, true},
|
||||
{"negative one", "-1", ModeAuto, true},
|
||||
{"case NONE", "NONE", ModeNone, true},
|
||||
{"case Auto", "Auto", ModeAuto, true},
|
||||
{"case aUtO", "aUtO", ModeAuto, true},
|
||||
{"case NoNe", "NoNe", ModeNone, true},
|
||||
{"empty", "", ModeBudget, false},
|
||||
{"level high", "high", ModeBudget, false},
|
||||
{"numeric", "8192", ModeBudget, false},
|
||||
{"negative other", "-2", ModeBudget, false},
|
||||
{"whitespace", " none ", ModeBudget, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
mode, ok := ParseSpecialSuffix(tt.rawSuffix)
|
||||
if mode != tt.wantMode {
|
||||
t.Errorf("mode = %q, want %q", mode, tt.wantMode)
|
||||
}
|
||||
if ok != tt.wantOK {
|
||||
t.Errorf("ok = %v, want %v", ok, tt.wantOK)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseSuffixAliasFormats tests alias model suffix parsing.
|
||||
//
|
||||
// This test validates that short model aliases (e.g., g25p, cs45) work correctly
|
||||
// with all suffix types. Alias-to-canonical-model mapping is caller's responsibility.
|
||||
func TestParseSuffixAliasFormats(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string // test case description
|
||||
model string // input model string with optional suffix
|
||||
wantName string // expected ModelName after parsing
|
||||
wantSuffix bool // expected HasSuffix value
|
||||
wantRaw string // expected RawSuffix value
|
||||
checkBudget bool // if true, verify ParseNumericSuffix result
|
||||
wantBudget int // expected budget (only when checkBudget=true)
|
||||
checkLevel bool // if true, verify ParseLevelSuffix result
|
||||
wantLevel ThinkingLevel // expected level (only when checkLevel=true)
|
||||
checkMode bool // if true, verify ParseSpecialSuffix result
|
||||
wantMode ThinkingMode // expected mode (only when checkMode=true)
|
||||
}{
|
||||
// Alias + numeric suffix
|
||||
{"alias numeric g25p", "g25p(1000)", "g25p", true, "1000", true, 1000, false, "", false, 0},
|
||||
{"alias numeric cs45", "cs45(16384)", "cs45", true, "16384", true, 16384, false, "", false, 0},
|
||||
{"alias numeric g3f", "g3f(8192)", "g3f", true, "8192", true, 8192, false, "", false, 0},
|
||||
// Alias + level suffix
|
||||
{"alias level gpt52", "gpt52(high)", "gpt52", true, "high", false, 0, true, LevelHigh, false, 0},
|
||||
{"alias level g25f", "g25f(medium)", "g25f", true, "medium", false, 0, true, LevelMedium, false, 0},
|
||||
{"alias level cs4", "cs4(low)", "cs4", true, "low", false, 0, true, LevelLow, false, 0},
|
||||
// Alias + special suffix
|
||||
{"alias auto g3f", "g3f(auto)", "g3f", true, "auto", false, 0, false, "", true, ModeAuto},
|
||||
{"alias none claude", "claude(none)", "claude", true, "none", false, 0, false, "", true, ModeNone},
|
||||
{"alias -1 g25p", "g25p(-1)", "g25p", true, "-1", false, 0, false, "", true, ModeAuto},
|
||||
// Single char alias
|
||||
{"single char c", "c(1024)", "c", true, "1024", true, 1024, false, "", false, 0},
|
||||
{"single char g", "g(high)", "g", true, "high", false, 0, true, LevelHigh, false, 0},
|
||||
// Alias containing numbers
|
||||
{"alias with num gpt5", "gpt5(medium)", "gpt5", true, "medium", false, 0, true, LevelMedium, false, 0},
|
||||
{"alias with num g25", "g25(1000)", "g25", true, "1000", true, 1000, false, "", false, 0},
|
||||
// Edge cases
|
||||
{"no suffix", "g25p", "g25p", false, "", false, 0, false, "", false, 0},
|
||||
{"empty alias", "(1000)", "", true, "1000", true, 1000, false, "", false, 0},
|
||||
{"hyphen alias", "g-25-p(1000)", "g-25-p", true, "1000", true, 1000, false, "", false, 0},
|
||||
{"underscore alias", "g_25_p(high)", "g_25_p", true, "high", false, 0, true, LevelHigh, false, 0},
|
||||
{"nested parens", "g25p(test)(1000)", "g25p(test)", true, "1000", true, 1000, false, "", false, 0},
|
||||
}
|
||||
|
||||
// ParseSuffix only extracts alias and suffix; mapping to canonical model is caller responsibility.
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := ParseSuffix(tt.model)
|
||||
|
||||
if result.ModelName != tt.wantName {
|
||||
t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantName)
|
||||
}
|
||||
if result.HasSuffix != tt.wantSuffix {
|
||||
t.Errorf("ParseSuffix(%q).HasSuffix = %v, want %v", tt.model, result.HasSuffix, tt.wantSuffix)
|
||||
}
|
||||
if result.RawSuffix != tt.wantRaw {
|
||||
t.Errorf("ParseSuffix(%q).RawSuffix = %q, want %q", tt.model, result.RawSuffix, tt.wantRaw)
|
||||
}
|
||||
|
||||
if result.HasSuffix {
|
||||
if tt.checkBudget {
|
||||
budget, ok := ParseNumericSuffix(result.RawSuffix)
|
||||
if !ok || budget != tt.wantBudget {
|
||||
t.Errorf("ParseNumericSuffix(%q) = (%d, %v), want (%d, true)",
|
||||
result.RawSuffix, budget, ok, tt.wantBudget)
|
||||
}
|
||||
}
|
||||
if tt.checkLevel {
|
||||
level, ok := ParseLevelSuffix(result.RawSuffix)
|
||||
if !ok || level != tt.wantLevel {
|
||||
t.Errorf("ParseLevelSuffix(%q) = (%q, %v), want (%q, true)",
|
||||
result.RawSuffix, level, ok, tt.wantLevel)
|
||||
}
|
||||
}
|
||||
if tt.checkMode {
|
||||
mode, ok := ParseSpecialSuffix(result.RawSuffix)
|
||||
if !ok || mode != tt.wantMode {
|
||||
t.Errorf("ParseSpecialSuffix(%q) = (%v, %v), want (%v, true)",
|
||||
result.RawSuffix, mode, ok, tt.wantMode)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
100
internal/thinking/types.go
Normal file
100
internal/thinking/types.go
Normal file
@@ -0,0 +1,100 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
//
|
||||
// This package offers a unified interface for parsing, validating, and applying
|
||||
// thinking configurations across various AI providers (Claude, Gemini, OpenAI, iFlow).
|
||||
package thinking
|
||||
|
||||
import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
|
||||
// ThinkingMode represents the type of thinking configuration mode.
|
||||
type ThinkingMode int
|
||||
|
||||
const (
|
||||
// ModeBudget indicates using a numeric budget (corresponds to suffix "(1000)" etc.)
|
||||
ModeBudget ThinkingMode = iota
|
||||
// ModeLevel indicates using a discrete level (corresponds to suffix "(high)" etc.)
|
||||
ModeLevel
|
||||
// ModeNone indicates thinking is disabled (corresponds to suffix "(none)" or budget=0)
|
||||
ModeNone
|
||||
// ModeAuto indicates automatic/dynamic thinking (corresponds to suffix "(auto)" or budget=-1)
|
||||
ModeAuto
|
||||
)
|
||||
|
||||
// ThinkingLevel represents a discrete thinking level.
|
||||
type ThinkingLevel string
|
||||
|
||||
const (
|
||||
// LevelNone disables thinking
|
||||
LevelNone ThinkingLevel = "none"
|
||||
// LevelAuto enables automatic/dynamic thinking
|
||||
LevelAuto ThinkingLevel = "auto"
|
||||
// LevelMinimal sets minimal thinking effort
|
||||
LevelMinimal ThinkingLevel = "minimal"
|
||||
// LevelLow sets low thinking effort
|
||||
LevelLow ThinkingLevel = "low"
|
||||
// LevelMedium sets medium thinking effort
|
||||
LevelMedium ThinkingLevel = "medium"
|
||||
// LevelHigh sets high thinking effort
|
||||
LevelHigh ThinkingLevel = "high"
|
||||
// LevelXHigh sets extra-high thinking effort
|
||||
LevelXHigh ThinkingLevel = "xhigh"
|
||||
)
|
||||
|
||||
// ThinkingConfig represents a unified thinking configuration.
|
||||
//
|
||||
// This struct is used to pass thinking configuration information between components.
|
||||
// Depending on Mode, either Budget or Level field is effective:
|
||||
// - ModeNone: Budget=0, Level is ignored
|
||||
// - ModeAuto: Budget=-1, Level is ignored
|
||||
// - ModeBudget: Budget is a positive integer, Level is ignored
|
||||
// - ModeLevel: Budget is ignored, Level is a valid level
|
||||
type ThinkingConfig struct {
|
||||
// Mode specifies the configuration mode
|
||||
Mode ThinkingMode
|
||||
// Budget is the thinking budget (token count), only effective when Mode is ModeBudget.
|
||||
// Special values: 0 means disabled, -1 means automatic
|
||||
Budget int
|
||||
// Level is the thinking level, only effective when Mode is ModeLevel
|
||||
Level ThinkingLevel
|
||||
}
|
||||
|
||||
// SuffixResult represents the result of parsing a model name for thinking suffix.
|
||||
//
|
||||
// A thinking suffix is specified in the format model-name(value), where value
|
||||
// can be a numeric budget (e.g., "16384") or a level name (e.g., "high").
|
||||
type SuffixResult struct {
|
||||
// ModelName is the model name with the suffix removed.
|
||||
// If no suffix was found, this equals the original input.
|
||||
ModelName string
|
||||
|
||||
// HasSuffix indicates whether a valid suffix was found.
|
||||
HasSuffix bool
|
||||
|
||||
// RawSuffix is the content inside the parentheses, without the parentheses.
|
||||
// Empty string if HasSuffix is false.
|
||||
RawSuffix string
|
||||
}
|
||||
|
||||
// ProviderApplier defines the interface for provider-specific thinking configuration application.
|
||||
//
|
||||
// Types implementing this interface are responsible for converting a unified ThinkingConfig
|
||||
// into provider-specific format and applying it to the request body.
|
||||
//
|
||||
// Implementation requirements:
|
||||
// - Apply method must be idempotent
|
||||
// - Must not modify the input config or modelInfo
|
||||
// - Returns a modified copy of the request body
|
||||
// - Returns appropriate ThinkingError for unsupported configurations
|
||||
type ProviderApplier interface {
|
||||
// Apply applies the thinking configuration to the request body.
|
||||
//
|
||||
// Parameters:
|
||||
// - body: Original request body JSON
|
||||
// - config: Unified thinking configuration
|
||||
// - modelInfo: Model registry information containing ThinkingSupport properties
|
||||
//
|
||||
// Returns:
|
||||
// - Modified request body JSON
|
||||
// - ThinkingError if the configuration is invalid or unsupported
|
||||
Apply(body []byte, config ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error)
|
||||
}
|
||||
260
internal/thinking/validate.go
Normal file
260
internal/thinking/validate.go
Normal file
@@ -0,0 +1,260 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// ClampBudget clamps a budget value to the specified range [min, max].
|
||||
//
|
||||
// This function ensures budget values stay within model-supported bounds.
|
||||
// When clamping occurs, a Debug-level log is recorded.
|
||||
//
|
||||
// Special handling:
|
||||
// - Auto value (-1) passes through without clamping
|
||||
// - Values below min are clamped to min
|
||||
// - Values above max are clamped to max
|
||||
//
|
||||
// Parameters:
|
||||
// - value: The budget value to clamp
|
||||
// - min: Minimum allowed budget (inclusive)
|
||||
// - max: Maximum allowed budget (inclusive)
|
||||
//
|
||||
// Returns:
|
||||
// - The clamped budget value (min ≤ result ≤ max, or -1 for auto)
|
||||
//
|
||||
// Logging:
|
||||
// - Debug level when value is clamped (either to min or max)
|
||||
// - Fields: original_value, clamped_to, min, max
|
||||
func ClampBudget(value, min, max int) int {
|
||||
// Auto value (-1) passes through without clamping
|
||||
if value == -1 {
|
||||
return value
|
||||
}
|
||||
|
||||
// Clamp to min if below
|
||||
if value < min {
|
||||
logClamp(value, min, min, max)
|
||||
return min
|
||||
}
|
||||
|
||||
// Clamp to max if above
|
||||
if value > max {
|
||||
logClamp(value, max, min, max)
|
||||
return max
|
||||
}
|
||||
|
||||
// Within range, return original
|
||||
return value
|
||||
}
|
||||
|
||||
// ClampBudgetWithZeroCheck clamps a budget value to the specified range [min, max]
|
||||
// while honoring the ZeroAllowed constraint.
|
||||
//
|
||||
// This function extends ClampBudget with ZeroAllowed boundary handling.
|
||||
// When zeroAllowed is false and value is 0, the value is clamped to min and logged.
|
||||
//
|
||||
// Parameters:
|
||||
// - value: The budget value to clamp
|
||||
// - min: Minimum allowed budget (inclusive)
|
||||
// - max: Maximum allowed budget (inclusive)
|
||||
// - zeroAllowed: Whether 0 (thinking disabled) is allowed
|
||||
//
|
||||
// Returns:
|
||||
// - The clamped budget value (min ≤ result ≤ max, or -1 for auto)
|
||||
//
|
||||
// Logging:
|
||||
// - Warn level when zeroAllowed=false and value=0 (zero not allowed for model)
|
||||
// - Fields: original_value, clamped_to, reason
|
||||
func ClampBudgetWithZeroCheck(value, min, max int, zeroAllowed bool) int {
|
||||
if value == 0 {
|
||||
if zeroAllowed {
|
||||
return 0
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"original_value": value,
|
||||
"clamped_to": min,
|
||||
"min": min,
|
||||
"max": max,
|
||||
"reason": "zero_not_allowed",
|
||||
}).Warn("budget clamped: zero not allowed")
|
||||
return min
|
||||
}
|
||||
|
||||
return ClampBudget(value, min, max)
|
||||
}
|
||||
|
||||
// ValidateConfig validates a thinking configuration against model capabilities.
|
||||
//
|
||||
// This function performs comprehensive validation:
|
||||
// - Checks if the model supports thinking
|
||||
// - Auto-converts between Budget and Level formats based on model capability
|
||||
// - Validates that requested level is in the model's supported levels list
|
||||
// - Clamps budget values to model's allowed range
|
||||
//
|
||||
// Parameters:
|
||||
// - config: The thinking configuration to validate
|
||||
// - support: Model's ThinkingSupport properties (nil means no thinking support)
|
||||
//
|
||||
// Returns:
|
||||
// - Normalized ThinkingConfig with clamped values
|
||||
// - ThinkingError if validation fails (ErrThinkingNotSupported, ErrLevelNotSupported, etc.)
|
||||
//
|
||||
// Auto-conversion behavior:
|
||||
// - Budget-only model + Level config → Level converted to Budget
|
||||
// - Level-only model + Budget config → Budget converted to Level
|
||||
// - Hybrid model → preserve original format
|
||||
func ValidateConfig(config ThinkingConfig, support *registry.ThinkingSupport) (*ThinkingConfig, error) {
|
||||
normalized := config
|
||||
if support == nil {
|
||||
if config.Mode != ModeNone {
|
||||
return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "unknown")
|
||||
}
|
||||
return &normalized, nil
|
||||
}
|
||||
|
||||
capability := detectModelCapability(®istry.ModelInfo{Thinking: support})
|
||||
switch capability {
|
||||
case CapabilityBudgetOnly:
|
||||
if normalized.Mode == ModeLevel {
|
||||
if normalized.Level == LevelAuto {
|
||||
break
|
||||
}
|
||||
budget, ok := ConvertLevelToBudget(string(normalized.Level))
|
||||
if !ok {
|
||||
return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", normalized.Level))
|
||||
}
|
||||
normalized.Mode = ModeBudget
|
||||
normalized.Budget = budget
|
||||
normalized.Level = ""
|
||||
}
|
||||
case CapabilityLevelOnly:
|
||||
if normalized.Mode == ModeBudget {
|
||||
level, ok := ConvertBudgetToLevel(normalized.Budget)
|
||||
if !ok {
|
||||
return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", normalized.Budget))
|
||||
}
|
||||
normalized.Mode = ModeLevel
|
||||
normalized.Level = ThinkingLevel(level)
|
||||
normalized.Budget = 0
|
||||
}
|
||||
case CapabilityHybrid:
|
||||
}
|
||||
|
||||
if normalized.Mode == ModeLevel && normalized.Level == LevelNone {
|
||||
normalized.Mode = ModeNone
|
||||
normalized.Budget = 0
|
||||
normalized.Level = ""
|
||||
}
|
||||
if normalized.Mode == ModeLevel && normalized.Level == LevelAuto {
|
||||
normalized.Mode = ModeAuto
|
||||
normalized.Budget = -1
|
||||
normalized.Level = ""
|
||||
}
|
||||
if normalized.Mode == ModeBudget && normalized.Budget == 0 {
|
||||
normalized.Mode = ModeNone
|
||||
normalized.Level = ""
|
||||
}
|
||||
|
||||
if len(support.Levels) > 0 && normalized.Mode == ModeLevel {
|
||||
if !isLevelSupported(string(normalized.Level), support.Levels) {
|
||||
validLevels := normalizeLevels(support.Levels)
|
||||
message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(normalized.Level)), strings.Join(validLevels, ", "))
|
||||
return nil, NewThinkingError(ErrLevelNotSupported, message)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert ModeAuto to mid-range if dynamic not allowed
|
||||
if normalized.Mode == ModeAuto && !support.DynamicAllowed {
|
||||
normalized = convertAutoToMidRange(normalized, support)
|
||||
}
|
||||
|
||||
switch normalized.Mode {
|
||||
case ModeBudget, ModeAuto, ModeNone:
|
||||
clamped := ClampBudgetWithZeroCheck(normalized.Budget, support.Min, support.Max, support.ZeroAllowed)
|
||||
normalized.Budget = clamped
|
||||
}
|
||||
|
||||
// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
|
||||
// This ensures Apply layer doesn't need to access support.Levels
|
||||
if normalized.Mode == ModeNone && normalized.Budget > 0 && len(support.Levels) > 0 {
|
||||
normalized.Level = ThinkingLevel(support.Levels[0])
|
||||
}
|
||||
|
||||
return &normalized, nil
|
||||
}
|
||||
|
||||
func isLevelSupported(level string, supported []string) bool {
|
||||
for _, candidate := range supported {
|
||||
if strings.EqualFold(level, strings.TrimSpace(candidate)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func normalizeLevels(levels []string) []string {
|
||||
normalized := make([]string, 0, len(levels))
|
||||
for _, level := range levels {
|
||||
normalized = append(normalized, strings.ToLower(strings.TrimSpace(level)))
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
// convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
|
||||
//
|
||||
// This function handles the case where a model does not support dynamic/auto thinking.
|
||||
// The auto mode is silently converted to a fixed value based on model capability:
|
||||
// - Level-only models: convert to ModeLevel with LevelMedium
|
||||
// - Budget models: convert to ModeBudget with mid = (Min + Max) / 2
|
||||
//
|
||||
// Logging:
|
||||
// - Debug level when conversion occurs
|
||||
// - Fields: original_mode, clamped_to, reason
|
||||
func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport) ThinkingConfig {
|
||||
// For level-only models (has Levels but no Min/Max range), use ModeLevel with medium
|
||||
if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 {
|
||||
config.Mode = ModeLevel
|
||||
config.Level = LevelMedium
|
||||
config.Budget = 0
|
||||
log.WithFields(log.Fields{
|
||||
"original_mode": "auto",
|
||||
"clamped_to": string(LevelMedium),
|
||||
"reason": "dynamic_not_allowed_level_only",
|
||||
}).Debug("thinking mode converted: dynamic not allowed, using medium level")
|
||||
return config
|
||||
}
|
||||
|
||||
// For budget models, use mid-range budget
|
||||
mid := (support.Min + support.Max) / 2
|
||||
if mid <= 0 && support.ZeroAllowed {
|
||||
config.Mode = ModeNone
|
||||
config.Budget = 0
|
||||
} else if mid <= 0 {
|
||||
config.Mode = ModeBudget
|
||||
config.Budget = support.Min
|
||||
} else {
|
||||
config.Mode = ModeBudget
|
||||
config.Budget = mid
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"original_mode": "auto",
|
||||
"clamped_to": config.Budget,
|
||||
"reason": "dynamic_not_allowed",
|
||||
}).Debug("thinking mode converted: dynamic not allowed")
|
||||
return config
|
||||
}
|
||||
|
||||
// logClamp logs a debug message when budget clamping occurs.
|
||||
func logClamp(original, clampedTo, min, max int) {
|
||||
log.WithFields(log.Fields{
|
||||
"original_value": original,
|
||||
"clamped_to": clampedTo,
|
||||
"min": min,
|
||||
"max": max,
|
||||
}).Debug("budget clamped: value outside model range")
|
||||
}
|
||||
349
internal/thinking/validate_test.go
Normal file
349
internal/thinking/validate_test.go
Normal file
@@ -0,0 +1,349 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
log "github.com/sirupsen/logrus"
|
||||
logtest "github.com/sirupsen/logrus/hooks/test"
|
||||
)
|
||||
|
||||
// TestClampBudget tests the ClampBudget function.
|
||||
//
|
||||
// ClampBudget applies range constraints to a budget value:
|
||||
// - budget < Min → clamp to Min (with Debug log)
|
||||
// - budget > Max → clamp to Max (with Debug log)
|
||||
// - Auto value (-1) passes through unchanged
|
||||
func TestClampBudget(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value int
|
||||
min int
|
||||
max int
|
||||
want int
|
||||
}{
|
||||
// Within range - no clamping
|
||||
{"within range", 8192, 128, 32768, 8192},
|
||||
{"at min", 128, 128, 32768, 128},
|
||||
{"at max", 32768, 128, 32768, 32768},
|
||||
|
||||
// Below min - clamp to min
|
||||
{"below min", 100, 128, 32768, 128},
|
||||
|
||||
// Above max - clamp to max
|
||||
{"above max", 50000, 128, 32768, 32768},
|
||||
|
||||
// Edge cases
|
||||
{"min equals max", 5000, 5000, 5000, 5000},
|
||||
{"zero min zero value", 0, 0, 100, 0},
|
||||
|
||||
// Auto value (-1) - passes through
|
||||
{"auto value", -1, 128, 32768, -1},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ClampBudget(tt.value, tt.min, tt.max)
|
||||
if got != tt.want {
|
||||
t.Errorf("ClampBudget(%d, %d, %d) = %d, want %d",
|
||||
tt.value, tt.min, tt.max, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestZeroAllowedBoundaryHandling tests ZeroAllowed=false edge cases.
|
||||
//
|
||||
// When ZeroAllowed=false and user requests 0, clamp to Min + log Warn.
|
||||
func TestZeroAllowedBoundaryHandling(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value int
|
||||
min int
|
||||
max int
|
||||
zeroAllowed bool
|
||||
want int
|
||||
}{
|
||||
// ZeroAllowed=true: 0 stays 0
|
||||
{"zero allowed - keep zero", 0, 128, 32768, true, 0},
|
||||
|
||||
// ZeroAllowed=false: 0 clamps to min
|
||||
{"zero not allowed - clamp to min", 0, 128, 32768, false, 128},
|
||||
|
||||
// ZeroAllowed=false but non-zero value: normal clamping
|
||||
{"zero not allowed - positive value", 8192, 1024, 100000, false, 8192},
|
||||
|
||||
// Auto value (-1) always passes through
|
||||
{"auto value", -1, 128, 32768, false, -1},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ClampBudgetWithZeroCheck(tt.value, tt.min, tt.max, tt.zeroAllowed)
|
||||
if got != tt.want {
|
||||
t.Errorf("ClampBudgetWithZeroCheck(%d, %d, %d, %v) = %d, want %d",
|
||||
tt.value, tt.min, tt.max, tt.zeroAllowed, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateConfigFramework verifies the ValidateConfig function framework.
|
||||
// This test is merged into TestValidateConfig for consolidation.
|
||||
|
||||
// TestValidateConfigNotSupported verifies nil support handling.
|
||||
// This test is merged into TestValidateConfig for consolidation.
|
||||
|
||||
// TestValidateConfigConversion verifies mode conversion based on capability.
|
||||
// This test is merged into TestValidateConfig for consolidation.
|
||||
|
||||
// TestValidateConfigLevelSupport verifies level list validation.
|
||||
// This test is merged into TestValidateConfig for consolidation.
|
||||
|
||||
// TestValidateConfigClamping verifies budget clamping behavior.
|
||||
// This test is merged into TestValidateConfig for consolidation.
|
||||
|
||||
// TestValidateConfig is the comprehensive test for ValidateConfig function.
|
||||
//
|
||||
// ValidateConfig checks if a ThinkingConfig is valid for a given model.
|
||||
// This test covers all validation scenarios including:
|
||||
// - Framework basics (nil support with ModeNone)
|
||||
// - Error cases (thinking not supported, level not supported, dynamic not allowed)
|
||||
// - Mode conversion (budget-only, level-only, hybrid)
|
||||
// - Budget clamping (to max, to min)
|
||||
// - ZeroAllowed boundary handling (ModeNone with ZeroAllowed=false)
|
||||
// - DynamicAllowed validation
|
||||
//
|
||||
// Depends on: Epic 5 Story 5-3 (config validity validation)
|
||||
func TestValidateConfig(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config ThinkingConfig
|
||||
support *registry.ThinkingSupport
|
||||
wantMode ThinkingMode
|
||||
wantBudget int
|
||||
wantLevel ThinkingLevel
|
||||
wantErr bool
|
||||
wantCode ErrorCode
|
||||
}{
|
||||
// Framework basics
|
||||
{"nil support mode none", ThinkingConfig{Mode: ModeNone, Budget: 0}, nil, ModeNone, 0, "", false, ""},
|
||||
|
||||
// Valid configs - no conversion needed
|
||||
{"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 8192, "", false, ""},
|
||||
|
||||
// Auto-conversion: Level → Budget
|
||||
{"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 24576, "", false, ""},
|
||||
|
||||
// Auto-conversion: Budget → Level
|
||||
{"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 5000}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, LevelMedium, false, ""},
|
||||
|
||||
// Hybrid preserves original format
|
||||
{"hybrid preserves level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}}, ModeLevel, 0, LevelLow, false, ""},
|
||||
|
||||
// Budget clamping
|
||||
{"budget clamped to max", ThinkingConfig{Mode: ModeBudget, Budget: 200000}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 100000, "", false, ""},
|
||||
{"budget clamped to min", ThinkingConfig{Mode: ModeBudget, Budget: 100}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 1024, "", false, ""},
|
||||
|
||||
// Error: thinking not supported
|
||||
{"thinking not supported", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, 0, 0, "", true, ErrThinkingNotSupported},
|
||||
|
||||
// Error: level not in list
|
||||
{"level not supported", ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, 0, 0, "", true, ErrLevelNotSupported},
|
||||
|
||||
// Level case-insensitive
|
||||
{"level supported case-insensitive", ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel("HIGH")}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, ThinkingLevel("HIGH"), false, ""},
|
||||
|
||||
// ModeAuto with DynamicAllowed
|
||||
{"auto with dynamic allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, ®istry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
|
||||
|
||||
// ModeAuto with DynamicAllowed=false - converts to mid-range (M3)
|
||||
{"auto with dynamic not allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, ®istry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""},
|
||||
|
||||
// ModeNone with ZeroAllowed=true - stays as ModeNone
|
||||
{"mode none with zero allowed", ThinkingConfig{Mode: ModeNone, Budget: 0}, ®istry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: true}, ModeNone, 0, "", false, ""},
|
||||
|
||||
// Budget=0 converts to ModeNone before clamping (M1)
|
||||
{"budget zero converts to none", ThinkingConfig{Mode: ModeBudget, Budget: 0}, ®istry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false}, ModeNone, 128, "", false, ""},
|
||||
|
||||
// Level=none converts to ModeNone before clamping, then Level set to lowest
|
||||
{"level none converts to none", ThinkingConfig{Mode: ModeLevel, Level: LevelNone}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""},
|
||||
{"level auto converts to auto", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
|
||||
// M1: Level=auto with DynamicAllowed=false - converts to mid-range budget
|
||||
{"level auto with dynamic not allowed", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""},
|
||||
// M2: Level=auto on Budget-only model (no Levels)
|
||||
{"level auto on budget-only model", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, ®istry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
|
||||
|
||||
// ModeNone with ZeroAllowed=false - clamps to min but preserves ModeNone (M1)
|
||||
{"mode none with zero not allowed - preserve mode", ThinkingConfig{Mode: ModeNone, Budget: 0}, ®istry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false}, ModeNone, 1024, "", false, ""},
|
||||
|
||||
// ModeNone with clamped Budget > 0 and Levels: sets Level to lowest
|
||||
{"mode none clamped with levels", ThinkingConfig{Mode: ModeNone, Budget: 0}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ValidateConfig(tt.config, tt.support)
|
||||
if tt.wantErr {
|
||||
if err == nil {
|
||||
t.Fatalf("ValidateConfig(%+v, support) error = nil, want %v", tt.config, tt.wantCode)
|
||||
}
|
||||
thinkingErr, ok := err.(*ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("ValidateConfig(%+v, support) error type = %T, want *ThinkingError", tt.config, err)
|
||||
}
|
||||
if thinkingErr.Code != tt.wantCode {
|
||||
t.Errorf("ValidateConfig(%+v, support) code = %v, want %v", tt.config, thinkingErr.Code, tt.wantCode)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateConfig(%+v, support) returned error: %v", tt.config, err)
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatalf("ValidateConfig(%+v, support) returned nil config", tt.config)
|
||||
}
|
||||
if got.Mode != tt.wantMode {
|
||||
t.Errorf("ValidateConfig(%+v, support) Mode = %v, want %v", tt.config, got.Mode, tt.wantMode)
|
||||
}
|
||||
if got.Budget != tt.wantBudget {
|
||||
t.Errorf("ValidateConfig(%+v, support) Budget = %d, want %d", tt.config, got.Budget, tt.wantBudget)
|
||||
}
|
||||
if got.Level != tt.wantLevel {
|
||||
t.Errorf("ValidateConfig(%+v, support) Level = %q, want %q", tt.config, got.Level, tt.wantLevel)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidationErrorMessages tests error message formatting.
|
||||
//
|
||||
// Error messages should:
|
||||
// - Be lowercase
|
||||
// - Have no trailing period
|
||||
// - Include context with %s/%d
|
||||
//
|
||||
// Depends on: Epic 5 Story 5-4 (validation error messages)
|
||||
func TestValidationErrorMessages(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
getErr func() error
|
||||
wantCode ErrorCode
|
||||
wantContains string
|
||||
}{
|
||||
{"invalid suffix", func() error {
|
||||
_, err := ParseSuffixWithError("model(abc")
|
||||
return err
|
||||
}, ErrInvalidSuffix, "model(abc"},
|
||||
{"level not supported", func() error {
|
||||
_, err := ValidateConfig(ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}})
|
||||
return err
|
||||
}, ErrLevelNotSupported, "valid levels: low, medium, high"},
|
||||
{"thinking not supported", func() error {
|
||||
_, err := ValidateConfig(ThinkingConfig{Mode: ModeBudget, Budget: 1024}, nil)
|
||||
return err
|
||||
}, ErrThinkingNotSupported, "thinking not supported for this model"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := tt.getErr()
|
||||
if err == nil {
|
||||
t.Fatalf("error = nil, want ThinkingError")
|
||||
}
|
||||
thinkingErr, ok := err.(*ThinkingError)
|
||||
if !ok {
|
||||
t.Fatalf("error type = %T, want *ThinkingError", err)
|
||||
}
|
||||
if thinkingErr.Code != tt.wantCode {
|
||||
t.Errorf("code = %v, want %v", thinkingErr.Code, tt.wantCode)
|
||||
}
|
||||
if thinkingErr.Message == "" {
|
||||
t.Fatalf("message is empty")
|
||||
}
|
||||
first, _ := utf8.DecodeRuneInString(thinkingErr.Message)
|
||||
if unicode.IsLetter(first) && !unicode.IsLower(first) {
|
||||
t.Errorf("message does not start with lowercase: %q", thinkingErr.Message)
|
||||
}
|
||||
if strings.HasSuffix(thinkingErr.Message, ".") {
|
||||
t.Errorf("message has trailing period: %q", thinkingErr.Message)
|
||||
}
|
||||
if !strings.Contains(thinkingErr.Message, tt.wantContains) {
|
||||
t.Errorf("message %q does not contain %q", thinkingErr.Message, tt.wantContains)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestClampingLogging tests that clamping produces correct log entries.
|
||||
//
|
||||
// Clamping behavior:
|
||||
// - Normal clamp (budget outside range) → Debug log
|
||||
// - ZeroAllowed=false + zero request → Warn log
|
||||
//
|
||||
// Depends on: Epic 5 Story 5-1, 5-2
|
||||
func TestClampingLogging(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
useZeroCheck bool
|
||||
budget int
|
||||
min int
|
||||
max int
|
||||
zeroAllowed bool
|
||||
wantLevel log.Level
|
||||
wantReason string
|
||||
wantClamped int
|
||||
}{
|
||||
{"above max - debug", false, 50000, 128, 32768, false, log.DebugLevel, "", 32768},
|
||||
{"below min - debug", false, 50, 128, 32768, false, log.DebugLevel, "", 128},
|
||||
{"zero not allowed - warn", true, 0, 128, 32768, false, log.WarnLevel, "zero_not_allowed", 128},
|
||||
}
|
||||
|
||||
logger := log.StandardLogger()
|
||||
originalLevel := logger.GetLevel()
|
||||
logger.SetLevel(log.DebugLevel)
|
||||
hook := logtest.NewLocal(logger)
|
||||
t.Cleanup(func() {
|
||||
logger.SetLevel(originalLevel)
|
||||
hook.Reset()
|
||||
})
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
hook.Reset()
|
||||
var got int
|
||||
if tt.useZeroCheck {
|
||||
got = ClampBudgetWithZeroCheck(tt.budget, tt.min, tt.max, tt.zeroAllowed)
|
||||
} else {
|
||||
got = ClampBudget(tt.budget, tt.min, tt.max)
|
||||
}
|
||||
if got != tt.wantClamped {
|
||||
t.Fatalf("clamped budget = %d, want %d", got, tt.wantClamped)
|
||||
}
|
||||
|
||||
entry := hook.LastEntry()
|
||||
if entry == nil {
|
||||
t.Fatalf("no log entry captured")
|
||||
}
|
||||
if entry.Level != tt.wantLevel {
|
||||
t.Errorf("log level = %v, want %v", entry.Level, tt.wantLevel)
|
||||
}
|
||||
|
||||
fields := []string{"original_value", "clamped_to", "min", "max"}
|
||||
for _, key := range fields {
|
||||
if _, ok := entry.Data[key]; !ok {
|
||||
t.Errorf("missing field %q", key)
|
||||
}
|
||||
}
|
||||
if tt.wantReason != "" {
|
||||
if value, ok := entry.Data["reason"]; !ok || value != tt.wantReason {
|
||||
t.Errorf("reason = %v, want %v", value, tt.wantReason)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -385,7 +386,9 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
}
|
||||
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if modelInfo != nil && modelInfo.Thinking != nil {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
@@ -394,6 +397,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -39,7 +40,8 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil {
|
||||
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
||||
if util.IsGemini3Model(modelName) {
|
||||
switch effort {
|
||||
@@ -53,14 +55,14 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
out = util.ApplyGeminiCLIThinkingLevel(out, level, nil)
|
||||
}
|
||||
}
|
||||
} else if !util.ModelUsesThinkingLevels(modelName) {
|
||||
} else if len(modelInfo.Thinking.Levels) == 0 {
|
||||
out = util.ApplyReasoningEffortToGeminiCLI(out, effort)
|
||||
}
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
@@ -71,7 +73,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
setBudget = true
|
||||
} else if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingBudget.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
|
||||
@@ -87,7 +89,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
|
||||
// Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens
|
||||
// This allows Claude Code and other Claude API clients to pass thinking configuration
|
||||
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) {
|
||||
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelInfo != nil && modelInfo.Thinking != nil {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
|
||||
@@ -15,6 +15,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -115,11 +117,13 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
}
|
||||
// Include thoughts configuration for reasoning process visibility
|
||||
// Only apply for models that support thinking and use numeric budgets, not discrete levels.
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
// Check for thinkingBudget first - if present, enable thinking with budget
|
||||
if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
|
||||
out, _ = sjson.Set(out, "thinking.type", "enabled")
|
||||
normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
|
||||
normalizedBudget := thinking.ClampBudget(int(thinkingBudget.Int()), modelInfo.Thinking.Min, modelInfo.Thinking.Max)
|
||||
out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
|
||||
} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
|
||||
// Fallback to include_thoughts if no budget specified
|
||||
@@ -127,6 +131,7 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// System instruction conversion to Claude Code format
|
||||
if sysInstr := root.Get("system_instruction"); sysInstr.Exists() {
|
||||
|
||||
@@ -15,7 +15,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -65,10 +66,12 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
|
||||
root := gjson.ParseBytes(rawJSON)
|
||||
|
||||
if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if v := root.Get("reasoning_effort"); v.Exists() {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
effort := strings.ToLower(strings.TrimSpace(v.String()))
|
||||
if effort != "" {
|
||||
budget, ok := util.ThinkingEffortToBudget(modelName, effort)
|
||||
budget, ok := thinking.ConvertLevelToBudget(effort)
|
||||
if ok {
|
||||
switch budget {
|
||||
case 0:
|
||||
@@ -84,6 +87,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for generating tool call IDs in the form: toolu_<alphanum>
|
||||
// This ensures unique identifiers for tool calls in the Claude Code format
|
||||
|
||||
@@ -10,7 +10,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -53,10 +54,12 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
|
||||
|
||||
root := gjson.ParseBytes(rawJSON)
|
||||
|
||||
if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if v := root.Get("reasoning.effort"); v.Exists() {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
effort := strings.ToLower(strings.TrimSpace(v.String()))
|
||||
if effort != "" {
|
||||
budget, ok := util.ThinkingEffortToBudget(modelName, effort)
|
||||
budget, ok := thinking.ConvertLevelToBudget(effort)
|
||||
if ok {
|
||||
switch budget {
|
||||
case 0:
|
||||
@@ -72,6 +75,7 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for generating tool call IDs when missing
|
||||
genToolCallID := func() string {
|
||||
|
||||
@@ -12,7 +12,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -219,19 +220,20 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
|
||||
// Convert thinking.budget_tokens to reasoning.effort for level-based models
|
||||
reasoningEffort := "medium" // default
|
||||
if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
|
||||
switch thinking.Get("type").String() {
|
||||
if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
switch thinkingConfig.Get("type").String() {
|
||||
case "enabled":
|
||||
if util.ModelUsesThinkingLevels(modelName) {
|
||||
if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
|
||||
if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
budget := int(budgetTokens.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
|
||||
reasoningEffort = effort
|
||||
}
|
||||
}
|
||||
}
|
||||
case "disabled":
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
|
||||
reasoningEffort = effort
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -251,10 +253,11 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
reasoningEffort := "medium" // default
|
||||
if genConfig := root.Get("generationConfig"); genConfig.Exists() {
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
if util.ModelUsesThinkingLevels(modelName) {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
|
||||
if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
|
||||
budget := int(thinkingBudget.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
|
||||
reasoningEffort = effort
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -160,7 +160,9 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
}
|
||||
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if modelInfo != nil && modelInfo.Thinking != nil {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
@@ -169,6 +171,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -39,13 +40,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -154,7 +154,9 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
@@ -163,6 +165,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
||||
out, _ = sjson.Set(out, "generationConfig.temperature", v.Num)
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -42,7 +43,8 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
// use thinkingLevel/includeThoughts instead.
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil {
|
||||
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
||||
if util.IsGemini3Model(modelName) {
|
||||
switch effort {
|
||||
@@ -56,14 +58,14 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
out = util.ApplyGeminiThinkingLevel(out, level, nil)
|
||||
}
|
||||
}
|
||||
} else if !util.ModelUsesThinkingLevels(modelName) {
|
||||
} else if len(modelInfo.Thinking.Levels) == 0 {
|
||||
out = util.ApplyReasoningEffortToGemini(out, effort)
|
||||
}
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -391,14 +392,15 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
||||
// OpenAI official reasoning fields take precedence
|
||||
// Only convert for models that use numeric budgets (not discrete levels).
|
||||
hasOfficialThinking := root.Get("reasoning.effort").Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
|
||||
if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
reasoningEffort := root.Get("reasoning.effort")
|
||||
out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
|
||||
}
|
||||
|
||||
// Cherry Studio extension (applies only when official fields are missing)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
|
||||
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -61,23 +62,23 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
out, _ = sjson.Set(out, "stream", stream)
|
||||
|
||||
// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
|
||||
if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() {
|
||||
if thinkingType := thinking.Get("type"); thinkingType.Exists() {
|
||||
if thinkingConfig := root.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
if thinkingType := thinkingConfig.Get("type"); thinkingType.Exists() {
|
||||
switch thinkingType.String() {
|
||||
case "enabled":
|
||||
if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
budget := int(budgetTokens.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
} else {
|
||||
// No budget_tokens specified, default to "auto" for enabled thinking
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, -1); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(-1); ok && effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
}
|
||||
case "disabled":
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"math/big"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -82,7 +82,7 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
|
||||
budget := int(thinkingBudget.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
|
||||
// ModelSupportsThinking reports whether the given model has Thinking capability
|
||||
// according to the model registry metadata (provider-agnostic).
|
||||
//
|
||||
// Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check.
|
||||
func ModelSupportsThinking(model string) bool {
|
||||
if model == "" {
|
||||
return false
|
||||
@@ -32,6 +34,8 @@ func ModelSupportsThinking(model string) bool {
|
||||
// If the model is unknown or has no Thinking metadata, returns the original budget.
|
||||
// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
|
||||
// or min (0 if zero is allowed and mid <= 0).
|
||||
//
|
||||
// Deprecated: Use thinking.ValidateConfig for budget normalization.
|
||||
func NormalizeThinkingBudget(model string, budget int) int {
|
||||
if budget == -1 { // dynamic
|
||||
if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
|
||||
@@ -89,6 +93,8 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
|
||||
|
||||
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
|
||||
// Returns nil if the model has no thinking support or no levels defined.
|
||||
//
|
||||
// Deprecated: Access modelInfo.Thinking.Levels directly.
|
||||
func GetModelThinkingLevels(model string) []string {
|
||||
if model == "" {
|
||||
return nil
|
||||
@@ -102,6 +108,8 @@ func GetModelThinkingLevels(model string) []string {
|
||||
|
||||
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
|
||||
// effort levels instead of numeric budgets.
|
||||
//
|
||||
// Deprecated: Check len(modelInfo.Thinking.Levels) > 0.
|
||||
func ModelUsesThinkingLevels(model string) bool {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
return len(levels) > 0
|
||||
@@ -109,6 +117,8 @@ func ModelUsesThinkingLevels(model string) bool {
|
||||
|
||||
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
|
||||
// level for the given model. Returns false when the level is not supported.
|
||||
//
|
||||
// Deprecated: Use thinking.ValidateConfig for level validation.
|
||||
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
if len(levels) == 0 {
|
||||
@@ -125,6 +135,8 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
|
||||
|
||||
// IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
|
||||
// These models may not advertise Thinking metadata in the registry.
|
||||
//
|
||||
// Deprecated: Check modelInfo.Type == "openai-compatibility".
|
||||
func IsOpenAICompatibilityModel(model string) bool {
|
||||
if model == "" {
|
||||
return false
|
||||
@@ -149,6 +161,8 @@ func IsOpenAICompatibilityModel(model string) bool {
|
||||
// - "xhigh" -> 32768
|
||||
//
|
||||
// Returns false when the effort level is empty or unsupported.
|
||||
//
|
||||
// Deprecated: Use thinking.ConvertLevelToBudget instead.
|
||||
func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
||||
if effort == "" {
|
||||
return 0, false
|
||||
@@ -186,6 +200,8 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
||||
// - "high" -> 32768
|
||||
//
|
||||
// Returns false when the level is empty or unsupported.
|
||||
//
|
||||
// Deprecated: Use thinking.ConvertLevelToBudget instead.
|
||||
func ThinkingLevelToBudget(level string) (int, bool) {
|
||||
if level == "" {
|
||||
return 0, false
|
||||
@@ -217,6 +233,8 @@ func ThinkingLevelToBudget(level string) (int, bool) {
|
||||
// - 24577.. -> highest supported level for the model (defaults to "xhigh")
|
||||
//
|
||||
// Returns false when the budget is unsupported (negative values other than -1).
|
||||
//
|
||||
// Deprecated: Use thinking.ConvertBudgetToLevel instead.
|
||||
func ThinkingBudgetToEffort(model string, budget int) (string, bool) {
|
||||
switch {
|
||||
case budget == -1:
|
||||
|
||||
130
internal/util/thinking_deprecation_test.go
Normal file
130
internal/util/thinking_deprecation_test.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestThinkingUtilDeprecationComments(t *testing.T) {
|
||||
dir, err := thinkingSourceDir()
|
||||
if err != nil {
|
||||
t.Fatalf("resolve thinking source dir: %v", err)
|
||||
}
|
||||
|
||||
// Test thinking.go deprecation comments
|
||||
t.Run("thinking.go", func(t *testing.T) {
|
||||
docs := parseFuncDocs(t, filepath.Join(dir, "thinking.go"))
|
||||
tests := []struct {
|
||||
funcName string
|
||||
want string
|
||||
}{
|
||||
{"ModelSupportsThinking", "Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check."},
|
||||
{"NormalizeThinkingBudget", "Deprecated: Use thinking.ValidateConfig for budget normalization."},
|
||||
{"ThinkingEffortToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."},
|
||||
{"ThinkingBudgetToEffort", "Deprecated: Use thinking.ConvertBudgetToLevel instead."},
|
||||
{"GetModelThinkingLevels", "Deprecated: Access modelInfo.Thinking.Levels directly."},
|
||||
{"ModelUsesThinkingLevels", "Deprecated: Check len(modelInfo.Thinking.Levels) > 0."},
|
||||
{"NormalizeReasoningEffortLevel", "Deprecated: Use thinking.ValidateConfig for level validation."},
|
||||
{"IsOpenAICompatibilityModel", "Deprecated: Check modelInfo.Type == \"openai-compatibility\"."},
|
||||
{"ThinkingLevelToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.funcName, func(t *testing.T) {
|
||||
doc, ok := docs[tt.funcName]
|
||||
if !ok {
|
||||
t.Fatalf("missing function %q in thinking.go", tt.funcName)
|
||||
}
|
||||
if !strings.Contains(doc, tt.want) {
|
||||
t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// Test thinking_suffix.go deprecation comments
|
||||
t.Run("thinking_suffix.go", func(t *testing.T) {
|
||||
docs := parseFuncDocs(t, filepath.Join(dir, "thinking_suffix.go"))
|
||||
tests := []struct {
|
||||
funcName string
|
||||
want string
|
||||
}{
|
||||
{"NormalizeThinkingModel", "Deprecated: Use thinking.ParseSuffix instead."},
|
||||
{"ThinkingFromMetadata", "Deprecated: Access ThinkingConfig fields directly."},
|
||||
{"ResolveThinkingConfigFromMetadata", "Deprecated: Use thinking.ApplyThinking instead."},
|
||||
{"ReasoningEffortFromMetadata", "Deprecated: Use thinking.ConvertBudgetToLevel instead."},
|
||||
{"ResolveOriginalModel", "Deprecated: Parse model suffix with thinking.ParseSuffix."},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.funcName, func(t *testing.T) {
|
||||
doc, ok := docs[tt.funcName]
|
||||
if !ok {
|
||||
t.Fatalf("missing function %q in thinking_suffix.go", tt.funcName)
|
||||
}
|
||||
if !strings.Contains(doc, tt.want) {
|
||||
t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// Test thinking_text.go deprecation comments
|
||||
t.Run("thinking_text.go", func(t *testing.T) {
|
||||
docs := parseFuncDocs(t, filepath.Join(dir, "thinking_text.go"))
|
||||
tests := []struct {
|
||||
funcName string
|
||||
want string
|
||||
}{
|
||||
{"GetThinkingText", "Deprecated: Use thinking package for thinking text extraction."},
|
||||
{"GetThinkingTextFromJSON", "Deprecated: Use thinking package for thinking text extraction."},
|
||||
{"SanitizeThinkingPart", "Deprecated: Use thinking package for thinking part sanitization."},
|
||||
{"StripCacheControl", "Deprecated: Use thinking package for cache control stripping."},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.funcName, func(t *testing.T) {
|
||||
doc, ok := docs[tt.funcName]
|
||||
if !ok {
|
||||
t.Fatalf("missing function %q in thinking_text.go", tt.funcName)
|
||||
}
|
||||
if !strings.Contains(doc, tt.want) {
|
||||
t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func parseFuncDocs(t *testing.T, path string) map[string]string {
|
||||
t.Helper()
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
docs := map[string]string{}
|
||||
for _, decl := range file.Decls {
|
||||
fn, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fn.Recv != nil {
|
||||
continue
|
||||
}
|
||||
if fn.Doc == nil {
|
||||
docs[fn.Name.Name] = ""
|
||||
continue
|
||||
}
|
||||
docs[fn.Name.Name] = fn.Doc.Text()
|
||||
}
|
||||
return docs
|
||||
}
|
||||
|
||||
func thinkingSourceDir() (string, error) {
|
||||
_, thisFile, _, ok := runtime.Caller(0)
|
||||
if !ok {
|
||||
return "", os.ErrNotExist
|
||||
}
|
||||
return filepath.Dir(thisFile), nil
|
||||
}
|
||||
@@ -7,15 +7,30 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
// Deprecated: No longer used. Thinking configuration is now passed via
|
||||
// model name suffix and processed by thinking.ApplyThinking().
|
||||
ThinkingBudgetMetadataKey = "thinking_budget"
|
||||
|
||||
// Deprecated: No longer used. See ThinkingBudgetMetadataKey.
|
||||
ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
|
||||
|
||||
// Deprecated: No longer used. See ThinkingBudgetMetadataKey.
|
||||
ReasoningEffortMetadataKey = "reasoning_effort"
|
||||
|
||||
// Deprecated: No longer used. The original model name (with suffix) is now
|
||||
// preserved directly in the model field. Use thinking.ParseSuffix() to
|
||||
// extract the base model name if needed.
|
||||
ThinkingOriginalModelMetadataKey = "thinking_original_model"
|
||||
|
||||
// ModelMappingOriginalModelMetadataKey stores the client-requested model alias
|
||||
// for OAuth model name mappings. This is NOT deprecated.
|
||||
ModelMappingOriginalModelMetadataKey = "model_mapping_original_model"
|
||||
)
|
||||
|
||||
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
||||
// the normalized base model with extracted metadata. Supported pattern:
|
||||
//
|
||||
// Deprecated: Use thinking.ParseSuffix instead.
|
||||
// - "(<value>)" where value can be:
|
||||
// - A numeric budget (e.g., "(8192)", "(16384)")
|
||||
// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
|
||||
@@ -89,6 +104,8 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||
|
||||
// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
|
||||
// It accepts both the new generic keys and legacy Gemini-specific keys.
|
||||
//
|
||||
// Deprecated: Access ThinkingConfig fields directly.
|
||||
func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
|
||||
if len(metadata) == 0 {
|
||||
return nil, nil, nil, false
|
||||
@@ -159,6 +176,8 @@ func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool)
|
||||
|
||||
// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
|
||||
// converting reasoning effort strings into budgets when possible.
|
||||
//
|
||||
// Deprecated: Use thinking.ApplyThinking instead.
|
||||
func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
@@ -180,6 +199,8 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*
|
||||
|
||||
// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
|
||||
// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
|
||||
//
|
||||
// Deprecated: Use thinking.ConvertBudgetToLevel instead.
|
||||
func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
@@ -204,6 +225,8 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
||||
|
||||
// ResolveOriginalModel returns the original model name stored in metadata (if present),
|
||||
// otherwise falls back to the provided model.
|
||||
//
|
||||
// Deprecated: Parse model suffix with thinking.ParseSuffix.
|
||||
func ResolveOriginalModel(model string, metadata map[string]any) string {
|
||||
normalize := func(name string) string {
|
||||
if name == "" {
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
|
||||
// - Gemini-style: { "thought": true, "text": "text" }
|
||||
// Returns the extracted text string.
|
||||
//
|
||||
// Deprecated: Use thinking package for thinking text extraction.
|
||||
func GetThinkingText(part gjson.Result) string {
|
||||
// Try direct text field first (Gemini-style)
|
||||
if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
|
||||
@@ -42,6 +44,8 @@ func GetThinkingText(part gjson.Result) string {
|
||||
}
|
||||
|
||||
// GetThinkingTextFromJSON extracts thinking text from a raw JSON string.
|
||||
//
|
||||
// Deprecated: Use thinking package for thinking text extraction.
|
||||
func GetThinkingTextFromJSON(jsonStr string) string {
|
||||
return GetThinkingText(gjson.Parse(jsonStr))
|
||||
}
|
||||
@@ -49,6 +53,8 @@ func GetThinkingTextFromJSON(jsonStr string) string {
|
||||
// SanitizeThinkingPart normalizes a thinking part to a canonical form.
|
||||
// Strips cache_control and other non-essential fields.
|
||||
// Returns the sanitized part as JSON string.
|
||||
//
|
||||
// Deprecated: Use thinking package for thinking part sanitization.
|
||||
func SanitizeThinkingPart(part gjson.Result) string {
|
||||
// Gemini-style: { thought: true, text, thoughtSignature }
|
||||
if part.Get("thought").Bool() {
|
||||
@@ -79,6 +85,8 @@ func SanitizeThinkingPart(part gjson.Result) string {
|
||||
}
|
||||
|
||||
// StripCacheControl removes cache_control and providerOptions from a JSON object.
|
||||
//
|
||||
// Deprecated: Use thinking package for cache control stripping.
|
||||
func StripCacheControl(jsonStr string) string {
|
||||
result := jsonStr
|
||||
result, _ = sjson.Delete(result, "cache_control")
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -379,7 +380,7 @@ func appendAPIResponse(c *gin.Context, data []byte) {
|
||||
// ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
|
||||
// This path is the only supported execution route.
|
||||
func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
|
||||
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
|
||||
providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
|
||||
if errMsg != nil {
|
||||
return nil, errMsg
|
||||
}
|
||||
@@ -388,16 +389,13 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
|
||||
Model: normalizedModel,
|
||||
Payload: cloneBytes(rawJSON),
|
||||
}
|
||||
if cloned := cloneMetadata(metadata); cloned != nil {
|
||||
req.Metadata = cloned
|
||||
}
|
||||
opts := coreexecutor.Options{
|
||||
Stream: false,
|
||||
Alt: alt,
|
||||
OriginalRequest: cloneBytes(rawJSON),
|
||||
SourceFormat: sdktranslator.FromString(handlerType),
|
||||
}
|
||||
opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
|
||||
opts.Metadata = reqMeta
|
||||
resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
|
||||
if err != nil {
|
||||
status := http.StatusInternalServerError
|
||||
@@ -420,7 +418,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
|
||||
// ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
|
||||
// This path is the only supported execution route.
|
||||
func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
|
||||
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
|
||||
providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
|
||||
if errMsg != nil {
|
||||
return nil, errMsg
|
||||
}
|
||||
@@ -429,16 +427,13 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
|
||||
Model: normalizedModel,
|
||||
Payload: cloneBytes(rawJSON),
|
||||
}
|
||||
if cloned := cloneMetadata(metadata); cloned != nil {
|
||||
req.Metadata = cloned
|
||||
}
|
||||
opts := coreexecutor.Options{
|
||||
Stream: false,
|
||||
Alt: alt,
|
||||
OriginalRequest: cloneBytes(rawJSON),
|
||||
SourceFormat: sdktranslator.FromString(handlerType),
|
||||
}
|
||||
opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
|
||||
opts.Metadata = reqMeta
|
||||
resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts)
|
||||
if err != nil {
|
||||
status := http.StatusInternalServerError
|
||||
@@ -461,7 +456,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
|
||||
// ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
|
||||
// This path is the only supported execution route.
|
||||
func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
|
||||
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
|
||||
providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
|
||||
if errMsg != nil {
|
||||
errChan := make(chan *interfaces.ErrorMessage, 1)
|
||||
errChan <- errMsg
|
||||
@@ -473,16 +468,13 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
|
||||
Model: normalizedModel,
|
||||
Payload: cloneBytes(rawJSON),
|
||||
}
|
||||
if cloned := cloneMetadata(metadata); cloned != nil {
|
||||
req.Metadata = cloned
|
||||
}
|
||||
opts := coreexecutor.Options{
|
||||
Stream: true,
|
||||
Alt: alt,
|
||||
OriginalRequest: cloneBytes(rawJSON),
|
||||
SourceFormat: sdktranslator.FromString(handlerType),
|
||||
}
|
||||
opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
|
||||
opts.Metadata = reqMeta
|
||||
chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
|
||||
if err != nil {
|
||||
errChan := make(chan *interfaces.ErrorMessage, 1)
|
||||
@@ -595,38 +587,40 @@ func statusFromError(err error) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
|
||||
// Resolve "auto" model to an actual available model first
|
||||
resolvedModelName := util.ResolveAutoModel(modelName)
|
||||
func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, err *interfaces.ErrorMessage) {
|
||||
resolvedModelName := modelName
|
||||
initialSuffix := thinking.ParseSuffix(modelName)
|
||||
if initialSuffix.ModelName == "auto" {
|
||||
resolvedBase := util.ResolveAutoModel(initialSuffix.ModelName)
|
||||
if initialSuffix.HasSuffix {
|
||||
resolvedModelName = fmt.Sprintf("%s(%s)", resolvedBase, initialSuffix.RawSuffix)
|
||||
} else {
|
||||
resolvedModelName = resolvedBase
|
||||
}
|
||||
} else {
|
||||
resolvedModelName = util.ResolveAutoModel(modelName)
|
||||
}
|
||||
|
||||
// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
|
||||
normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
|
||||
parsed := thinking.ParseSuffix(resolvedModelName)
|
||||
baseModel := strings.TrimSpace(parsed.ModelName)
|
||||
|
||||
// Use the normalizedModel to get the provider name.
|
||||
providers = util.GetProviderName(normalizedModel)
|
||||
if len(providers) == 0 && metadata != nil {
|
||||
if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
|
||||
if originalModel, okStr := originalRaw.(string); okStr {
|
||||
originalModel = strings.TrimSpace(originalModel)
|
||||
if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
|
||||
if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
|
||||
providers = altProviders
|
||||
normalizedModel = originalModel
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
providers = util.GetProviderName(baseModel)
|
||||
// Fallback: if baseModel has no provider but differs from resolvedModelName,
|
||||
// try using the full model name. This handles edge cases where custom models
|
||||
// may be registered with their full suffixed name (e.g., "my-model(8192)").
|
||||
// Evaluated in Story 11.8: This fallback is intentionally preserved to support
|
||||
// custom model registrations that include thinking suffixes.
|
||||
if len(providers) == 0 && baseModel != resolvedModelName {
|
||||
providers = util.GetProviderName(resolvedModelName)
|
||||
}
|
||||
|
||||
if len(providers) == 0 {
|
||||
return nil, "", nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
|
||||
return nil, "", &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
|
||||
}
|
||||
|
||||
// If it's a dynamic model, the normalizedModel was already set to extractedModelName.
|
||||
// If it's a non-dynamic model, normalizedModel was set by normalizeModelMetadata.
|
||||
// So, normalizedModel is already correctly set at this point.
|
||||
|
||||
return providers, normalizedModel, metadata, nil
|
||||
// The thinking suffix is preserved in the model name itself, so no
|
||||
// metadata-based configuration passing is needed.
|
||||
return providers, resolvedModelName, nil
|
||||
}
|
||||
|
||||
func cloneBytes(src []byte) []byte {
|
||||
@@ -638,10 +632,6 @@ func cloneBytes(src []byte) []byte {
|
||||
return dst
|
||||
}
|
||||
|
||||
func normalizeModelMetadata(modelName string) (string, map[string]any) {
|
||||
return util.NormalizeThinkingModel(modelName)
|
||||
}
|
||||
|
||||
func cloneMetadata(src map[string]any) map[string]any {
|
||||
if len(src) == 0 {
|
||||
return nil
|
||||
|
||||
118
sdk/api/handlers/handlers_request_details_test.go
Normal file
118
sdk/api/handlers/handlers_request_details_test.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
|
||||
)
|
||||
|
||||
func TestGetRequestDetails_PreservesSuffix(t *testing.T) {
|
||||
modelRegistry := registry.GetGlobalRegistry()
|
||||
now := time.Now().Unix()
|
||||
|
||||
modelRegistry.RegisterClient("test-request-details-gemini", "gemini", []*registry.ModelInfo{
|
||||
{ID: "gemini-2.5-pro", Created: now + 30},
|
||||
{ID: "gemini-2.5-flash", Created: now + 25},
|
||||
})
|
||||
modelRegistry.RegisterClient("test-request-details-openai", "openai", []*registry.ModelInfo{
|
||||
{ID: "gpt-5.2", Created: now + 20},
|
||||
})
|
||||
modelRegistry.RegisterClient("test-request-details-claude", "claude", []*registry.ModelInfo{
|
||||
{ID: "claude-sonnet-4-5", Created: now + 5},
|
||||
})
|
||||
|
||||
// Ensure cleanup of all test registrations.
|
||||
clientIDs := []string{
|
||||
"test-request-details-gemini",
|
||||
"test-request-details-openai",
|
||||
"test-request-details-claude",
|
||||
}
|
||||
for _, clientID := range clientIDs {
|
||||
id := clientID
|
||||
t.Cleanup(func() {
|
||||
modelRegistry.UnregisterClient(id)
|
||||
})
|
||||
}
|
||||
|
||||
handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, coreauth.NewManager(nil, nil, nil))
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
inputModel string
|
||||
wantProviders []string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "numeric suffix preserved",
|
||||
inputModel: "gemini-2.5-pro(8192)",
|
||||
wantProviders: []string{"gemini"},
|
||||
wantModel: "gemini-2.5-pro(8192)",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "level suffix preserved",
|
||||
inputModel: "gpt-5.2(high)",
|
||||
wantProviders: []string{"openai"},
|
||||
wantModel: "gpt-5.2(high)",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "no suffix unchanged",
|
||||
inputModel: "claude-sonnet-4-5",
|
||||
wantProviders: []string{"claude"},
|
||||
wantModel: "claude-sonnet-4-5",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "unknown model with suffix",
|
||||
inputModel: "unknown-model(8192)",
|
||||
wantProviders: nil,
|
||||
wantModel: "",
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "auto suffix resolved",
|
||||
inputModel: "auto(high)",
|
||||
wantProviders: []string{"gemini"},
|
||||
wantModel: "gemini-2.5-pro(high)",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "special suffix none preserved",
|
||||
inputModel: "gemini-2.5-flash(none)",
|
||||
wantProviders: []string{"gemini"},
|
||||
wantModel: "gemini-2.5-flash(none)",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "special suffix auto preserved",
|
||||
inputModel: "claude-sonnet-4-5(auto)",
|
||||
wantProviders: []string{"claude"},
|
||||
wantModel: "claude-sonnet-4-5(auto)",
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
providers, model, errMsg := handler.getRequestDetails(tt.inputModel)
|
||||
if (errMsg != nil) != tt.wantErr {
|
||||
t.Fatalf("getRequestDetails() error = %v, wantErr %v", errMsg, tt.wantErr)
|
||||
}
|
||||
if errMsg != nil {
|
||||
return
|
||||
}
|
||||
if !reflect.DeepEqual(providers, tt.wantProviders) {
|
||||
t.Fatalf("getRequestDetails() providers = %v, want %v", providers, tt.wantProviders)
|
||||
}
|
||||
if model != tt.wantModel {
|
||||
t.Fatalf("getRequestDetails() model = %v, want %v", model, tt.wantModel)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
201
sdk/cliproxy/auth/api_key_model_mappings_test.go
Normal file
201
sdk/cliproxy/auth/api_key_model_mappings_test.go
Normal file
@@ -0,0 +1,201 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
)
|
||||
|
||||
func TestLookupAPIKeyUpstreamModel(t *testing.T) {
|
||||
cfg := &internalconfig.Config{
|
||||
GeminiKey: []internalconfig.GeminiKey{
|
||||
{
|
||||
APIKey: "k",
|
||||
BaseURL: "https://example.com",
|
||||
Models: []internalconfig.GeminiModel{
|
||||
{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"},
|
||||
{Name: "gemini-2.5-flash(low)", Alias: "g25f"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
mgr := NewManager(nil, nil, nil)
|
||||
mgr.SetConfig(cfg)
|
||||
|
||||
ctx := context.Background()
|
||||
_, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k", "base_url": "https://example.com"}})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
authID string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
// Fast path + suffix preservation
|
||||
{"alias with suffix", "a1", "g25p(8192)", "gemini-2.5-pro-exp-03-25(8192)"},
|
||||
{"alias without suffix", "a1", "g25p", "gemini-2.5-pro-exp-03-25"},
|
||||
|
||||
// Config suffix takes priority
|
||||
{"config suffix priority", "a1", "g25f(high)", "gemini-2.5-flash(low)"},
|
||||
{"config suffix no user suffix", "a1", "g25f", "gemini-2.5-flash(low)"},
|
||||
|
||||
// Case insensitive
|
||||
{"uppercase alias", "a1", "G25P", "gemini-2.5-pro-exp-03-25"},
|
||||
{"mixed case with suffix", "a1", "G25p(4096)", "gemini-2.5-pro-exp-03-25(4096)"},
|
||||
|
||||
// Direct name lookup
|
||||
{"upstream name direct", "a1", "gemini-2.5-pro-exp-03-25", "gemini-2.5-pro-exp-03-25"},
|
||||
{"upstream name with suffix", "a1", "gemini-2.5-pro-exp-03-25(8192)", "gemini-2.5-pro-exp-03-25(8192)"},
|
||||
|
||||
// Cache miss scenarios
|
||||
{"non-existent auth", "non-existent", "g25p", ""},
|
||||
{"unknown alias", "a1", "unknown-alias", ""},
|
||||
{"empty auth ID", "", "g25p", ""},
|
||||
{"empty model", "a1", "", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input)
|
||||
if resolved != tt.want {
|
||||
t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPIKeyModelMappings_ConfigHotReload(t *testing.T) {
|
||||
cfg := &internalconfig.Config{
|
||||
GeminiKey: []internalconfig.GeminiKey{
|
||||
{
|
||||
APIKey: "k",
|
||||
Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
mgr := NewManager(nil, nil, nil)
|
||||
mgr.SetConfig(cfg)
|
||||
|
||||
ctx := context.Background()
|
||||
_, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}})
|
||||
|
||||
// Initial mapping
|
||||
if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-pro-exp-03-25" {
|
||||
t.Fatalf("before reload: got %q, want %q", resolved, "gemini-2.5-pro-exp-03-25")
|
||||
}
|
||||
|
||||
// Hot reload with new mapping
|
||||
mgr.SetConfig(&internalconfig.Config{
|
||||
GeminiKey: []internalconfig.GeminiKey{
|
||||
{
|
||||
APIKey: "k",
|
||||
Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-flash", Alias: "g25p"}},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// New mapping should take effect
|
||||
if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-flash" {
|
||||
t.Fatalf("after reload: got %q, want %q", resolved, "gemini-2.5-flash")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPIKeyModelMappings_MultipleProviders(t *testing.T) {
|
||||
cfg := &internalconfig.Config{
|
||||
GeminiKey: []internalconfig.GeminiKey{{APIKey: "gemini-key", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro", Alias: "gp"}}}},
|
||||
ClaudeKey: []internalconfig.ClaudeKey{{APIKey: "claude-key", Models: []internalconfig.ClaudeModel{{Name: "claude-sonnet-4", Alias: "cs4"}}}},
|
||||
CodexKey: []internalconfig.CodexKey{{APIKey: "codex-key", Models: []internalconfig.CodexModel{{Name: "o3", Alias: "o"}}}},
|
||||
}
|
||||
|
||||
mgr := NewManager(nil, nil, nil)
|
||||
mgr.SetConfig(cfg)
|
||||
|
||||
ctx := context.Background()
|
||||
_, _ = mgr.Register(ctx, &Auth{ID: "gemini-auth", Provider: "gemini", Attributes: map[string]string{"api_key": "gemini-key"}})
|
||||
_, _ = mgr.Register(ctx, &Auth{ID: "claude-auth", Provider: "claude", Attributes: map[string]string{"api_key": "claude-key"}})
|
||||
_, _ = mgr.Register(ctx, &Auth{ID: "codex-auth", Provider: "codex", Attributes: map[string]string{"api_key": "codex-key"}})
|
||||
|
||||
tests := []struct {
|
||||
authID, input, want string
|
||||
}{
|
||||
{"gemini-auth", "gp", "gemini-2.5-pro"},
|
||||
{"claude-auth", "cs4", "claude-sonnet-4"},
|
||||
{"codex-auth", "o", "o3"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input); resolved != tt.want {
|
||||
t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyAPIKeyModelMapping(t *testing.T) {
|
||||
cfg := &internalconfig.Config{
|
||||
GeminiKey: []internalconfig.GeminiKey{
|
||||
{APIKey: "k", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}}},
|
||||
},
|
||||
}
|
||||
|
||||
mgr := NewManager(nil, nil, nil)
|
||||
mgr.SetConfig(cfg)
|
||||
|
||||
ctx := context.Background()
|
||||
apiKeyAuth := &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}}
|
||||
oauthAuth := &Auth{ID: "oauth-auth", Provider: "gemini", Attributes: map[string]string{"auth_kind": "oauth"}}
|
||||
_, _ = mgr.Register(ctx, apiKeyAuth)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
auth *Auth
|
||||
inputModel string
|
||||
wantModel string
|
||||
wantOriginal string
|
||||
expectMapping bool
|
||||
}{
|
||||
{
|
||||
name: "api_key auth with alias",
|
||||
auth: apiKeyAuth,
|
||||
inputModel: "g25p(8192)",
|
||||
wantModel: "gemini-2.5-pro-exp-03-25(8192)",
|
||||
wantOriginal: "g25p(8192)",
|
||||
expectMapping: true,
|
||||
},
|
||||
{
|
||||
name: "oauth auth passthrough",
|
||||
auth: oauthAuth,
|
||||
inputModel: "some-model",
|
||||
wantModel: "some-model",
|
||||
expectMapping: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
metadata := map[string]any{"existing": "value"}
|
||||
resolvedModel, resultMeta := mgr.applyAPIKeyModelMapping(tt.auth, tt.inputModel, metadata)
|
||||
|
||||
if resolvedModel != tt.wantModel {
|
||||
t.Errorf("model = %q, want %q", resolvedModel, tt.wantModel)
|
||||
}
|
||||
|
||||
if resultMeta["existing"] != "value" {
|
||||
t.Error("existing metadata not preserved")
|
||||
}
|
||||
|
||||
original, hasOriginal := resultMeta["model_mapping_original_model"].(string)
|
||||
if tt.expectMapping {
|
||||
if !hasOriginal || original != tt.wantOriginal {
|
||||
t.Errorf("original model = %q, want %q", original, tt.wantOriginal)
|
||||
}
|
||||
} else {
|
||||
if hasOriginal {
|
||||
t.Error("should not set model_mapping_original_model for non-api_key auth")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -15,8 +15,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -120,6 +122,14 @@ type Manager struct {
|
||||
// modelNameMappings stores global model name alias mappings (alias -> upstream name) keyed by channel.
|
||||
modelNameMappings atomic.Value
|
||||
|
||||
// runtimeConfig stores the latest application config for request-time decisions.
|
||||
// It is initialized in NewManager; never Load() before first Store().
|
||||
runtimeConfig atomic.Value
|
||||
|
||||
// apiKeyModelMappings caches resolved model alias mappings for API-key auths.
|
||||
// Keyed by auth.ID, value is alias(lower) -> upstream model (including suffix).
|
||||
apiKeyModelMappings atomic.Value
|
||||
|
||||
// Optional HTTP RoundTripper provider injected by host.
|
||||
rtProvider RoundTripperProvider
|
||||
|
||||
@@ -135,7 +145,7 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager {
|
||||
if hook == nil {
|
||||
hook = NoopHook{}
|
||||
}
|
||||
return &Manager{
|
||||
manager := &Manager{
|
||||
store: store,
|
||||
executors: make(map[string]ProviderExecutor),
|
||||
selector: selector,
|
||||
@@ -143,6 +153,10 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager {
|
||||
auths: make(map[string]*Auth),
|
||||
providerOffsets: make(map[string]int),
|
||||
}
|
||||
// atomic.Value requires non-nil initial value.
|
||||
manager.runtimeConfig.Store(&internalconfig.Config{})
|
||||
manager.apiKeyModelMappings.Store(apiKeyModelMappingTable(nil))
|
||||
return manager
|
||||
}
|
||||
|
||||
func (m *Manager) SetSelector(selector Selector) {
|
||||
@@ -171,6 +185,181 @@ func (m *Manager) SetRoundTripperProvider(p RoundTripperProvider) {
|
||||
m.mu.Unlock()
|
||||
}
|
||||
|
||||
// SetConfig updates the runtime config snapshot used by request-time helpers.
|
||||
// Callers should provide the latest config on reload so per-credential alias mapping stays in sync.
|
||||
func (m *Manager) SetConfig(cfg *internalconfig.Config) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
if cfg == nil {
|
||||
cfg = &internalconfig.Config{}
|
||||
}
|
||||
m.runtimeConfig.Store(cfg)
|
||||
m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
|
||||
}
|
||||
|
||||
func (m *Manager) lookupAPIKeyUpstreamModel(authID, requestedModel string) string {
|
||||
if m == nil {
|
||||
return ""
|
||||
}
|
||||
authID = strings.TrimSpace(authID)
|
||||
if authID == "" {
|
||||
return ""
|
||||
}
|
||||
requestedModel = strings.TrimSpace(requestedModel)
|
||||
if requestedModel == "" {
|
||||
return ""
|
||||
}
|
||||
table, _ := m.apiKeyModelMappings.Load().(apiKeyModelMappingTable)
|
||||
if table == nil {
|
||||
return ""
|
||||
}
|
||||
byAlias := table[authID]
|
||||
if len(byAlias) == 0 {
|
||||
return ""
|
||||
}
|
||||
key := strings.ToLower(thinking.ParseSuffix(requestedModel).ModelName)
|
||||
if key == "" {
|
||||
key = strings.ToLower(requestedModel)
|
||||
}
|
||||
resolved := strings.TrimSpace(byAlias[key])
|
||||
if resolved == "" {
|
||||
return ""
|
||||
}
|
||||
// Preserve thinking suffix from the client's requested model unless config already has one.
|
||||
requestResult := thinking.ParseSuffix(requestedModel)
|
||||
if thinking.ParseSuffix(resolved).HasSuffix {
|
||||
return resolved
|
||||
}
|
||||
if requestResult.HasSuffix && requestResult.RawSuffix != "" {
|
||||
return resolved + "(" + requestResult.RawSuffix + ")"
|
||||
}
|
||||
return resolved
|
||||
|
||||
}
|
||||
|
||||
func (m *Manager) rebuildAPIKeyModelMappingsFromRuntimeConfig() {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config)
|
||||
if cfg == nil {
|
||||
cfg = &internalconfig.Config{}
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
m.rebuildAPIKeyModelMappingsLocked(cfg)
|
||||
}
|
||||
|
||||
func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
if cfg == nil {
|
||||
cfg = &internalconfig.Config{}
|
||||
}
|
||||
|
||||
out := make(apiKeyModelMappingTable)
|
||||
for _, auth := range m.auths {
|
||||
if auth == nil {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(auth.ID) == "" {
|
||||
continue
|
||||
}
|
||||
kind, _ := auth.AccountInfo()
|
||||
if !strings.EqualFold(strings.TrimSpace(kind), "api_key") {
|
||||
continue
|
||||
}
|
||||
|
||||
byAlias := make(map[string]string)
|
||||
provider := strings.ToLower(strings.TrimSpace(auth.Provider))
|
||||
switch provider {
|
||||
case "gemini":
|
||||
if entry := resolveGeminiAPIKeyConfig(cfg, auth); entry != nil {
|
||||
compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
|
||||
}
|
||||
case "claude":
|
||||
if entry := resolveClaudeAPIKeyConfig(cfg, auth); entry != nil {
|
||||
compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
|
||||
}
|
||||
case "codex":
|
||||
if entry := resolveCodexAPIKeyConfig(cfg, auth); entry != nil {
|
||||
compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
|
||||
}
|
||||
case "vertex":
|
||||
if entry := resolveVertexAPIKeyConfig(cfg, auth); entry != nil {
|
||||
compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
|
||||
}
|
||||
default:
|
||||
// OpenAI-compat uses config selection from auth.Attributes.
|
||||
providerKey := ""
|
||||
compatName := ""
|
||||
if auth.Attributes != nil {
|
||||
providerKey = strings.TrimSpace(auth.Attributes["provider_key"])
|
||||
compatName = strings.TrimSpace(auth.Attributes["compat_name"])
|
||||
}
|
||||
if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
|
||||
if entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider); entry != nil {
|
||||
compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(byAlias) > 0 {
|
||||
out[auth.ID] = byAlias
|
||||
}
|
||||
}
|
||||
|
||||
m.apiKeyModelMappings.Store(out)
|
||||
}
|
||||
|
||||
func compileAPIKeyModelMappingsForModels[T interface {
|
||||
GetName() string
|
||||
GetAlias() string
|
||||
}](out map[string]string, models []T) {
|
||||
if out == nil {
|
||||
return
|
||||
}
|
||||
for i := range models {
|
||||
alias := strings.TrimSpace(models[i].GetAlias())
|
||||
name := strings.TrimSpace(models[i].GetName())
|
||||
if alias == "" || name == "" {
|
||||
continue
|
||||
}
|
||||
aliasKey := strings.ToLower(thinking.ParseSuffix(alias).ModelName)
|
||||
if aliasKey == "" {
|
||||
aliasKey = strings.ToLower(alias)
|
||||
}
|
||||
// Config priority: first alias wins.
|
||||
if _, exists := out[aliasKey]; exists {
|
||||
continue
|
||||
}
|
||||
out[aliasKey] = name
|
||||
// Also allow direct lookup by upstream name (case-insensitive), so lookups on already-upstream
|
||||
// models remain a cheap no-op.
|
||||
nameKey := strings.ToLower(thinking.ParseSuffix(name).ModelName)
|
||||
if nameKey == "" {
|
||||
nameKey = strings.ToLower(name)
|
||||
}
|
||||
if nameKey != "" {
|
||||
if _, exists := out[nameKey]; !exists {
|
||||
out[nameKey] = name
|
||||
}
|
||||
}
|
||||
// Preserve config suffix priority by seeding a base-name lookup when name already has suffix.
|
||||
nameResult := thinking.ParseSuffix(name)
|
||||
if nameResult.HasSuffix {
|
||||
baseKey := strings.ToLower(strings.TrimSpace(nameResult.ModelName))
|
||||
if baseKey != "" {
|
||||
if _, exists := out[baseKey]; !exists {
|
||||
out[baseKey] = name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SetRetryConfig updates retry attempts and cooldown wait interval.
|
||||
func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) {
|
||||
if m == nil {
|
||||
@@ -219,6 +408,7 @@ func (m *Manager) Register(ctx context.Context, auth *Auth) (*Auth, error) {
|
||||
m.mu.Lock()
|
||||
m.auths[auth.ID] = auth.Clone()
|
||||
m.mu.Unlock()
|
||||
m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
|
||||
_ = m.persist(ctx, auth)
|
||||
m.hook.OnAuthRegistered(ctx, auth.Clone())
|
||||
return auth.Clone(), nil
|
||||
@@ -237,6 +427,7 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
|
||||
auth.EnsureIndex()
|
||||
m.auths[auth.ID] = auth.Clone()
|
||||
m.mu.Unlock()
|
||||
m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
|
||||
_ = m.persist(ctx, auth)
|
||||
m.hook.OnAuthUpdated(ctx, auth.Clone())
|
||||
return auth.Clone(), nil
|
||||
@@ -261,6 +452,11 @@ func (m *Manager) Load(ctx context.Context) error {
|
||||
auth.EnsureIndex()
|
||||
m.auths[auth.ID] = auth.Clone()
|
||||
}
|
||||
cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config)
|
||||
if cfg == nil {
|
||||
cfg = &internalconfig.Config{}
|
||||
}
|
||||
m.rebuildAPIKeyModelMappingsLocked(cfg)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -558,6 +754,7 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
|
||||
execReq := req
|
||||
execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
|
||||
execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
|
||||
result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
|
||||
if errExec != nil {
|
||||
@@ -606,6 +803,7 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
|
||||
execReq := req
|
||||
execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
|
||||
execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
|
||||
result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
|
||||
if errExec != nil {
|
||||
@@ -654,6 +852,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
|
||||
execReq := req
|
||||
execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
|
||||
execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
|
||||
if errStream != nil {
|
||||
rerr := &Error{Message: errStream.Error()}
|
||||
@@ -712,7 +911,6 @@ func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string]
|
||||
return metadata
|
||||
}
|
||||
keys := []string{
|
||||
util.ThinkingOriginalModelMetadataKey,
|
||||
util.GeminiOriginalModelMetadataKey,
|
||||
util.ModelMappingOriginalModelMetadataKey,
|
||||
}
|
||||
@@ -740,6 +938,215 @@ func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string]
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *Manager) applyAPIKeyModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) {
|
||||
if m == nil || auth == nil {
|
||||
return requestedModel, metadata
|
||||
}
|
||||
|
||||
kind, _ := auth.AccountInfo()
|
||||
if !strings.EqualFold(strings.TrimSpace(kind), "api_key") {
|
||||
return requestedModel, metadata
|
||||
}
|
||||
|
||||
requestedModel = strings.TrimSpace(requestedModel)
|
||||
if requestedModel == "" {
|
||||
return requestedModel, metadata
|
||||
}
|
||||
|
||||
// Fast path: lookup per-auth mapping table (keyed by auth.ID).
|
||||
if resolved := m.lookupAPIKeyUpstreamModel(auth.ID, requestedModel); resolved != "" {
|
||||
return applyUpstreamModelOverride(requestedModel, resolved, metadata)
|
||||
}
|
||||
|
||||
// Slow path: scan config for the matching credential entry and resolve alias.
|
||||
// This acts as a safety net if mappings are stale or auth.ID is missing.
|
||||
cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config)
|
||||
if cfg == nil {
|
||||
cfg = &internalconfig.Config{}
|
||||
}
|
||||
|
||||
provider := strings.ToLower(strings.TrimSpace(auth.Provider))
|
||||
upstreamModel := ""
|
||||
switch provider {
|
||||
case "gemini":
|
||||
upstreamModel = resolveUpstreamModelForGeminiAPIKey(cfg, auth, requestedModel)
|
||||
case "claude":
|
||||
upstreamModel = resolveUpstreamModelForClaudeAPIKey(cfg, auth, requestedModel)
|
||||
case "codex":
|
||||
upstreamModel = resolveUpstreamModelForCodexAPIKey(cfg, auth, requestedModel)
|
||||
case "vertex":
|
||||
upstreamModel = resolveUpstreamModelForVertexAPIKey(cfg, auth, requestedModel)
|
||||
default:
|
||||
upstreamModel = resolveUpstreamModelForOpenAICompatAPIKey(cfg, auth, requestedModel)
|
||||
}
|
||||
|
||||
// applyUpstreamModelOverride lives in model_name_mappings.go.
|
||||
return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata)
|
||||
}
|
||||
|
||||
// APIKeyConfigEntry is a generic interface for API key configurations.
|
||||
type APIKeyConfigEntry interface {
|
||||
GetAPIKey() string
|
||||
GetBaseURL() string
|
||||
}
|
||||
|
||||
func resolveAPIKeyConfig[T APIKeyConfigEntry](entries []T, auth *Auth) *T {
|
||||
if auth == nil || len(entries) == 0 {
|
||||
return nil
|
||||
}
|
||||
attrKey, attrBase := "", ""
|
||||
if auth.Attributes != nil {
|
||||
attrKey = strings.TrimSpace(auth.Attributes["api_key"])
|
||||
attrBase = strings.TrimSpace(auth.Attributes["base_url"])
|
||||
}
|
||||
for i := range entries {
|
||||
entry := &entries[i]
|
||||
cfgKey := strings.TrimSpace((*entry).GetAPIKey())
|
||||
cfgBase := strings.TrimSpace((*entry).GetBaseURL())
|
||||
if attrKey != "" && attrBase != "" {
|
||||
if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
continue
|
||||
}
|
||||
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
|
||||
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
if attrKey != "" {
|
||||
for i := range entries {
|
||||
entry := &entries[i]
|
||||
if strings.EqualFold(strings.TrimSpace((*entry).GetAPIKey()), attrKey) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func resolveGeminiAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.GeminiKey {
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
return resolveAPIKeyConfig(cfg.GeminiKey, auth)
|
||||
}
|
||||
|
||||
func resolveClaudeAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.ClaudeKey {
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
return resolveAPIKeyConfig(cfg.ClaudeKey, auth)
|
||||
}
|
||||
|
||||
func resolveCodexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.CodexKey {
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
return resolveAPIKeyConfig(cfg.CodexKey, auth)
|
||||
}
|
||||
|
||||
func resolveVertexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.VertexCompatKey {
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
return resolveAPIKeyConfig(cfg.VertexCompatAPIKey, auth)
|
||||
}
|
||||
|
||||
func resolveUpstreamModelForGeminiAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
|
||||
entry := resolveGeminiAPIKeyConfig(cfg, auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
|
||||
}
|
||||
|
||||
func resolveUpstreamModelForClaudeAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
|
||||
entry := resolveClaudeAPIKeyConfig(cfg, auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
|
||||
}
|
||||
|
||||
func resolveUpstreamModelForCodexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
|
||||
entry := resolveCodexAPIKeyConfig(cfg, auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
|
||||
}
|
||||
|
||||
func resolveUpstreamModelForVertexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
|
||||
entry := resolveVertexAPIKeyConfig(cfg, auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
|
||||
}
|
||||
|
||||
func resolveUpstreamModelForOpenAICompatAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
|
||||
providerKey := ""
|
||||
compatName := ""
|
||||
if auth != nil && len(auth.Attributes) > 0 {
|
||||
providerKey = strings.TrimSpace(auth.Attributes["provider_key"])
|
||||
compatName = strings.TrimSpace(auth.Attributes["compat_name"])
|
||||
}
|
||||
if compatName == "" && !strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
|
||||
return ""
|
||||
}
|
||||
entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
|
||||
}
|
||||
|
||||
type apiKeyModelMappingTable map[string]map[string]string
|
||||
|
||||
func resolveOpenAICompatConfig(cfg *internalconfig.Config, providerKey, compatName, authProvider string) *internalconfig.OpenAICompatibility {
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
candidates := make([]string, 0, 3)
|
||||
if v := strings.TrimSpace(compatName); v != "" {
|
||||
candidates = append(candidates, v)
|
||||
}
|
||||
if v := strings.TrimSpace(providerKey); v != "" {
|
||||
candidates = append(candidates, v)
|
||||
}
|
||||
if v := strings.TrimSpace(authProvider); v != "" {
|
||||
candidates = append(candidates, v)
|
||||
}
|
||||
for i := range cfg.OpenAICompatibility {
|
||||
compat := &cfg.OpenAICompatibility[i]
|
||||
for _, candidate := range candidates {
|
||||
if candidate != "" && strings.EqualFold(strings.TrimSpace(candidate), compat.Name) {
|
||||
return compat
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func asModelAliasEntries[T interface {
|
||||
GetName() string
|
||||
GetAlias() string
|
||||
}](models []T) []modelMappingEntry {
|
||||
if len(models) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]modelMappingEntry, 0, len(models))
|
||||
for i := range models {
|
||||
out = append(out, models[i])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *Manager) normalizeProviders(providers []string) []string {
|
||||
if len(providers) == 0 {
|
||||
return nil
|
||||
|
||||
@@ -4,9 +4,15 @@ import (
|
||||
"strings"
|
||||
|
||||
internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
)
|
||||
|
||||
type modelMappingEntry interface {
|
||||
GetName() string
|
||||
GetAlias() string
|
||||
}
|
||||
|
||||
type modelNameMappingTable struct {
|
||||
// reverse maps channel -> alias (lower) -> original upstream model name.
|
||||
reverse map[string]map[string]string
|
||||
@@ -71,9 +77,14 @@ func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.Mod
|
||||
// requested model for response translation.
|
||||
func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) {
|
||||
upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel)
|
||||
return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata)
|
||||
}
|
||||
|
||||
func applyUpstreamModelOverride(requestedModel, upstreamModel string, metadata map[string]any) (string, map[string]any) {
|
||||
if upstreamModel == "" {
|
||||
return requestedModel, metadata
|
||||
}
|
||||
|
||||
out := make(map[string]any, 1)
|
||||
if len(metadata) > 0 {
|
||||
out = make(map[string]any, len(metadata)+1)
|
||||
@@ -81,24 +92,92 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta
|
||||
out[k] = v
|
||||
}
|
||||
}
|
||||
// Store the requested alias (e.g., "gp") so downstream can use it to look up
|
||||
// model metadata from the global registry where it was registered under this alias.
|
||||
|
||||
// Preserve the original client model string (including any suffix) for downstream.
|
||||
out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
|
||||
return upstreamModel, out
|
||||
}
|
||||
|
||||
func resolveModelAliasFromConfigModels(requestedModel string, models []modelMappingEntry) string {
|
||||
requestedModel = strings.TrimSpace(requestedModel)
|
||||
if requestedModel == "" {
|
||||
return ""
|
||||
}
|
||||
if len(models) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
requestResult := thinking.ParseSuffix(requestedModel)
|
||||
base := requestResult.ModelName
|
||||
candidates := []string{base}
|
||||
if base != requestedModel {
|
||||
candidates = append(candidates, requestedModel)
|
||||
}
|
||||
|
||||
preserveSuffix := func(resolved string) string {
|
||||
resolved = strings.TrimSpace(resolved)
|
||||
if resolved == "" {
|
||||
return ""
|
||||
}
|
||||
if thinking.ParseSuffix(resolved).HasSuffix {
|
||||
return resolved
|
||||
}
|
||||
if requestResult.HasSuffix && requestResult.RawSuffix != "" {
|
||||
return resolved + "(" + requestResult.RawSuffix + ")"
|
||||
}
|
||||
return resolved
|
||||
}
|
||||
|
||||
for i := range models {
|
||||
name := strings.TrimSpace(models[i].GetName())
|
||||
alias := strings.TrimSpace(models[i].GetAlias())
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if alias != "" && strings.EqualFold(alias, candidate) {
|
||||
if name != "" {
|
||||
return preserveSuffix(name)
|
||||
}
|
||||
return preserveSuffix(candidate)
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return preserveSuffix(name)
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveOAuthUpstreamModel resolves the upstream model name from OAuth model mappings.
|
||||
// If a mapping exists, returns the original (upstream) model name that corresponds
|
||||
// to the requested alias.
|
||||
//
|
||||
// If the requested model contains a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
|
||||
// the suffix is preserved in the returned model name. However, if the mapping's
|
||||
// original name already contains a suffix, the config suffix takes priority.
|
||||
func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string {
|
||||
return resolveUpstreamModelFromMappingTable(m, auth, requestedModel, modelMappingChannel(auth))
|
||||
}
|
||||
|
||||
func resolveUpstreamModelFromMappingTable(m *Manager, auth *Auth, requestedModel, channel string) string {
|
||||
if m == nil || auth == nil {
|
||||
return ""
|
||||
}
|
||||
channel := modelMappingChannel(auth)
|
||||
if channel == "" {
|
||||
return ""
|
||||
}
|
||||
key := strings.ToLower(strings.TrimSpace(requestedModel))
|
||||
if key == "" {
|
||||
return ""
|
||||
|
||||
// Extract thinking suffix from requested model using ParseSuffix
|
||||
requestResult := thinking.ParseSuffix(requestedModel)
|
||||
baseModel := requestResult.ModelName
|
||||
|
||||
// Candidate keys to match: base model and raw input (handles suffix-parsing edge cases).
|
||||
candidates := []string{baseModel}
|
||||
if baseModel != requestedModel {
|
||||
candidates = append(candidates, requestedModel)
|
||||
}
|
||||
|
||||
raw := m.modelNameMappings.Load()
|
||||
table, _ := raw.(*modelNameMappingTable)
|
||||
if table == nil || table.reverse == nil {
|
||||
@@ -108,12 +187,33 @@ func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) s
|
||||
if rev == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
for _, candidate := range candidates {
|
||||
key := strings.ToLower(strings.TrimSpace(candidate))
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
original := strings.TrimSpace(rev[key])
|
||||
if original == "" || strings.EqualFold(original, requestedModel) {
|
||||
if original == "" {
|
||||
continue
|
||||
}
|
||||
if strings.EqualFold(original, baseModel) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// If config already has suffix, it takes priority.
|
||||
if thinking.ParseSuffix(original).HasSuffix {
|
||||
return original
|
||||
}
|
||||
// Preserve user's thinking suffix on the resolved model.
|
||||
if requestResult.HasSuffix && requestResult.RawSuffix != "" {
|
||||
return original + "(" + requestResult.RawSuffix + ")"
|
||||
}
|
||||
return original
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// modelMappingChannel extracts the OAuth model mapping channel from an Auth object.
|
||||
// It determines the provider and auth kind from the Auth's attributes and delegates
|
||||
|
||||
187
sdk/cliproxy/auth/model_name_mappings_test.go
Normal file
187
sdk/cliproxy/auth/model_name_mappings_test.go
Normal file
@@ -0,0 +1,187 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
)
|
||||
|
||||
func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
mappings map[string][]internalconfig.ModelNameMapping
|
||||
channel string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "numeric suffix preserved",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "gemini-2.5-pro(8192)",
|
||||
want: "gemini-2.5-pro-exp-03-25(8192)",
|
||||
},
|
||||
{
|
||||
name: "level suffix preserved",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"claude": {{Name: "claude-sonnet-4-5-20250514", Alias: "claude-sonnet-4-5"}},
|
||||
},
|
||||
channel: "claude",
|
||||
input: "claude-sonnet-4-5(high)",
|
||||
want: "claude-sonnet-4-5-20250514(high)",
|
||||
},
|
||||
{
|
||||
name: "no suffix unchanged",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "gemini-2.5-pro",
|
||||
want: "gemini-2.5-pro-exp-03-25",
|
||||
},
|
||||
{
|
||||
name: "config suffix takes priority",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"claude": {{Name: "claude-sonnet-4-5-20250514(low)", Alias: "claude-sonnet-4-5"}},
|
||||
},
|
||||
channel: "claude",
|
||||
input: "claude-sonnet-4-5(high)",
|
||||
want: "claude-sonnet-4-5-20250514(low)",
|
||||
},
|
||||
{
|
||||
name: "auto suffix preserved",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "gemini-2.5-pro(auto)",
|
||||
want: "gemini-2.5-pro-exp-03-25(auto)",
|
||||
},
|
||||
{
|
||||
name: "none suffix preserved",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "gemini-2.5-pro(none)",
|
||||
want: "gemini-2.5-pro-exp-03-25(none)",
|
||||
},
|
||||
{
|
||||
name: "case insensitive alias lookup with suffix",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "Gemini-2.5-Pro"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "gemini-2.5-pro(high)",
|
||||
want: "gemini-2.5-pro-exp-03-25(high)",
|
||||
},
|
||||
{
|
||||
name: "no mapping returns empty",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "unknown-model(high)",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "wrong channel returns empty",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
},
|
||||
channel: "claude",
|
||||
input: "gemini-2.5-pro(high)",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "empty suffix filtered out",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "gemini-2.5-pro()",
|
||||
want: "gemini-2.5-pro-exp-03-25",
|
||||
},
|
||||
{
|
||||
name: "incomplete suffix treated as no suffix",
|
||||
mappings: map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro(high"}},
|
||||
},
|
||||
channel: "gemini-cli",
|
||||
input: "gemini-2.5-pro(high",
|
||||
want: "gemini-2.5-pro-exp-03-25",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
mgr := NewManager(nil, nil, nil)
|
||||
mgr.SetConfig(&internalconfig.Config{})
|
||||
mgr.SetOAuthModelMappings(tt.mappings)
|
||||
|
||||
auth := createAuthForChannel(tt.channel)
|
||||
got := mgr.resolveOAuthUpstreamModel(auth, tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("resolveOAuthUpstreamModel(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func createAuthForChannel(channel string) *Auth {
|
||||
switch channel {
|
||||
case "gemini-cli":
|
||||
return &Auth{Provider: "gemini-cli"}
|
||||
case "claude":
|
||||
return &Auth{Provider: "claude", Attributes: map[string]string{"auth_kind": "oauth"}}
|
||||
case "vertex":
|
||||
return &Auth{Provider: "vertex", Attributes: map[string]string{"auth_kind": "oauth"}}
|
||||
case "codex":
|
||||
return &Auth{Provider: "codex", Attributes: map[string]string{"auth_kind": "oauth"}}
|
||||
case "aistudio":
|
||||
return &Auth{Provider: "aistudio"}
|
||||
case "antigravity":
|
||||
return &Auth{Provider: "antigravity"}
|
||||
case "qwen":
|
||||
return &Auth{Provider: "qwen"}
|
||||
case "iflow":
|
||||
return &Auth{Provider: "iflow"}
|
||||
default:
|
||||
return &Auth{Provider: channel}
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyOAuthModelMapping_SuffixPreservation(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
mappings := map[string][]internalconfig.ModelNameMapping{
|
||||
"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
|
||||
}
|
||||
|
||||
mgr := NewManager(nil, nil, nil)
|
||||
mgr.SetConfig(&internalconfig.Config{})
|
||||
mgr.SetOAuthModelMappings(mappings)
|
||||
|
||||
auth := &Auth{ID: "test-auth-id", Provider: "gemini-cli"}
|
||||
metadata := map[string]any{"existing": "value"}
|
||||
|
||||
resolvedModel, resultMeta := mgr.applyOAuthModelMapping(auth, "gemini-2.5-pro(8192)", metadata)
|
||||
if resolvedModel != "gemini-2.5-pro-exp-03-25(8192)" {
|
||||
t.Errorf("applyOAuthModelMapping() model = %q, want %q", resolvedModel, "gemini-2.5-pro-exp-03-25(8192)")
|
||||
}
|
||||
|
||||
originalModel, ok := resultMeta["model_mapping_original_model"].(string)
|
||||
if !ok || originalModel != "gemini-2.5-pro(8192)" {
|
||||
t.Errorf("applyOAuthModelMapping() metadata[model_mapping_original_model] = %v, want %q", resultMeta["model_mapping_original_model"], "gemini-2.5-pro(8192)")
|
||||
}
|
||||
|
||||
if resultMeta["existing"] != "value" {
|
||||
t.Errorf("applyOAuthModelMapping() metadata[existing] = %v, want %q", resultMeta["existing"], "value")
|
||||
}
|
||||
}
|
||||
@@ -215,6 +215,7 @@ func (b *Builder) Build() (*Service, error) {
|
||||
}
|
||||
// Attach a default RoundTripper provider so providers can opt-in per-auth transports.
|
||||
coreManager.SetRoundTripperProvider(newDefaultRoundTripperProvider())
|
||||
coreManager.SetConfig(b.cfg)
|
||||
coreManager.SetOAuthModelMappings(b.cfg.OAuthModelMappings)
|
||||
|
||||
service := &Service{
|
||||
|
||||
@@ -553,6 +553,7 @@ func (s *Service) Run(ctx context.Context) error {
|
||||
s.cfg = newCfg
|
||||
s.cfgMu.Unlock()
|
||||
if s.coreManager != nil {
|
||||
s.coreManager.SetConfig(newCfg)
|
||||
s.coreManager.SetOAuthModelMappings(newCfg.OAuthModelMappings)
|
||||
}
|
||||
s.rebindExecutors()
|
||||
@@ -825,6 +826,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
|
||||
OwnedBy: compat.Name,
|
||||
Type: "openai-compatibility",
|
||||
DisplayName: modelID,
|
||||
UserDefined: true,
|
||||
})
|
||||
}
|
||||
// Register and return
|
||||
@@ -1157,6 +1159,7 @@ func buildConfigModels[T modelEntry](models []T, ownedBy, modelType string) []*M
|
||||
OwnedBy: ownedBy,
|
||||
Type: modelType,
|
||||
DisplayName: display,
|
||||
UserDefined: true,
|
||||
}
|
||||
if name != "" {
|
||||
if upstream := registry.LookupStaticModelInfo(name); upstream != nil && upstream.Thinking != nil {
|
||||
|
||||
@@ -3,9 +3,10 @@ package test
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md
|
||||
@@ -178,7 +179,7 @@ func TestModelAliasThinkingSuffix(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow
|
||||
// Step 5: Test Gemini 2.5 thinkingBudget application using thinking.ApplyThinking
|
||||
if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) {
|
||||
body := []byte(`{"request":{"contents":[]}}`)
|
||||
|
||||
@@ -195,8 +196,13 @@ func TestModelAliasThinkingSuffix(t *testing.T) {
|
||||
testMetadata[k] = v
|
||||
}
|
||||
|
||||
// Use the exported ApplyThinkingMetadataCLI which includes the fallback logic
|
||||
result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel)
|
||||
// Merge thinking config from metadata into body
|
||||
body = applyThinkingFromMetadata(body, testMetadata)
|
||||
|
||||
// Use thinking.ApplyThinking for unified thinking config handling
|
||||
// Note: ApplyThinking now takes model string, not *ModelInfo
|
||||
result, _ := thinking.ApplyThinking(body, tt.upstreamModel, "gemini-cli")
|
||||
|
||||
budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
|
||||
expectedBudget := tt.expectedValue.(int)
|
||||
@@ -209,3 +215,48 @@ func TestModelAliasThinkingSuffix(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// applyThinkingFromMetadata merges thinking configuration from metadata into the payload.
|
||||
func applyThinkingFromMetadata(payload []byte, metadata map[string]any) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Merge thinking_budget from metadata if present
|
||||
if budget, ok := metadata["thinking_budget"]; ok {
|
||||
if budgetVal, okNum := parseNumberToInt(budget); okNum {
|
||||
payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", budgetVal)
|
||||
}
|
||||
}
|
||||
|
||||
// Merge reasoning_effort from metadata if present
|
||||
if effort, ok := metadata["reasoning_effort"]; ok {
|
||||
if effortStr, okStr := effort.(string); okStr && effortStr != "" {
|
||||
payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingLevel", effortStr)
|
||||
}
|
||||
}
|
||||
|
||||
// Merge thinking_include_thoughts from metadata if present
|
||||
if include, ok := metadata["thinking_include_thoughts"]; ok {
|
||||
if includeBool, okBool := include.(bool); okBool {
|
||||
payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.includeThoughts", includeBool)
|
||||
}
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// parseNumberToInt safely converts various numeric types to int
|
||||
func parseNumberToInt(raw any) (int, bool) {
|
||||
switch v := raw.(type) {
|
||||
case int:
|
||||
return v, true
|
||||
case int32:
|
||||
return int(v), true
|
||||
case int64:
|
||||
return int(v), true
|
||||
case float64:
|
||||
return int(v), true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user