diff --git a/README.md b/README.md index 7875a989..bd339982 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,10 @@ Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management. +### [ZeroLimit](https://github.com/0xtbug/zero-limit) + +Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed. + > [!NOTE] > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list. diff --git a/README_CN.md b/README_CN.md index fdc8d64c..1b3ed74b 100644 --- a/README_CN.md +++ b/README_CN.md @@ -129,6 +129,10 @@ CLI 封装器,用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户 一款 VSCode 扩展,提供了在 VSCode 中快速切换 Claude Code 模型的功能,内置 CLIProxyAPI 作为其后端,支持后台自动启动和关闭。 +### [ZeroLimit](https://github.com/0xtbug/zero-limit) + +Windows 桌面应用,基于 Tauri + React 构建,用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪,提供实时仪表盘、系统托盘集成和一键代理控制,无需 API 密钥。 + > [!NOTE] > 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR(拉取请求)将其添加到此列表中。 diff --git a/cmd/server/main.go b/cmd/server/main.go index f9bb2080..385d7cfa 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -61,6 +61,7 @@ func main() { var iflowLogin bool var iflowCookie bool var noBrowser bool + var oauthCallbackPort int var antigravityLogin bool var projectID string var vertexImport string @@ -75,6 +76,7 @@ func main() { flag.BoolVar(&iflowLogin, "iflow-login", false, "Login to iFlow using OAuth") flag.BoolVar(&iflowCookie, "iflow-cookie", false, "Login to iFlow using Cookie") flag.BoolVar(&noBrowser, "no-browser", false, "Don't open browser automatically for OAuth") + flag.IntVar(&oauthCallbackPort, "oauth-callback-port", 0, "Override OAuth callback port (defaults to provider-specific port)") flag.BoolVar(&antigravityLogin, "antigravity-login", false, "Login to Antigravity using OAuth") flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)") flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path") @@ -425,7 +427,8 @@ func main() { // Create login options to be used in authentication flows. options := &cmd.LoginOptions{ - NoBrowser: noBrowser, + NoBrowser: noBrowser, + CallbackPort: oauthCallbackPort, } // Register the shared token store once so all components use the same persistence backend. diff --git a/config.example.yaml b/config.example.yaml index 3a7e7fbd..ce402f99 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -85,6 +85,10 @@ nonstream-keepalive-interval: 0 # keepalive-seconds: 15 # Default: 0 (disabled). <= 0 disables keep-alives. # bootstrap-retries: 1 # Default: 0 (disabled). Retries before first byte is sent. +# When true, enable official Codex instructions injection for Codex API requests. +# When false (default), CodexInstructionsForModel returns immediately without modification. +codex-instructions-enabled: false + # Gemini API keys # gemini-api-key: # - api-key: "AIzaSy...01" @@ -201,12 +205,27 @@ nonstream-keepalive-interval: 0 # - from: "claude-haiku-4-5-20251001" # to: "gemini-2.5-flash" -# Global OAuth model name mappings (per channel) -# These mappings rename model IDs for both model listing and request routing. +# Global OAuth model name aliases (per channel) +# These aliases rename model IDs for both model listing and request routing. # Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow. -# NOTE: Mappings do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode. +# NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode. # You can repeat the same name with different aliases to expose multiple client model names. -# oauth-model-mappings: +oauth-model-alias: + antigravity: + - name: "rev19-uic3-1p" + alias: "gemini-2.5-computer-use-preview-10-2025" + - name: "gemini-3-pro-image" + alias: "gemini-3-pro-image-preview" + - name: "gemini-3-pro-high" + alias: "gemini-3-pro-preview" + - name: "gemini-3-flash" + alias: "gemini-3-flash-preview" + - name: "claude-sonnet-4-5" + alias: "gemini-claude-sonnet-4-5" + - name: "claude-sonnet-4-5-thinking" + alias: "gemini-claude-sonnet-4-5-thinking" + - name: "claude-opus-4-5-thinking" + alias: "gemini-claude-opus-4-5-thinking" # gemini-cli: # - name: "gemini-2.5-pro" # original model name under this channel # alias: "g2.5p" # client-visible alias @@ -217,9 +236,6 @@ nonstream-keepalive-interval: 0 # aistudio: # - name: "gemini-2.5-pro" # alias: "g2.5p" -# antigravity: -# - name: "gemini-3-pro-preview" -# alias: "g3p" # claude: # - name: "claude-sonnet-4-5-20250929" # alias: "cs4.5" @@ -263,9 +279,21 @@ nonstream-keepalive-interval: 0 # protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex # params: # JSON path (gjson/sjson syntax) -> value # "generationConfig.thinkingConfig.thinkingBudget": 32768 +# default-raw: # Default raw rules set parameters using raw JSON when missing (must be valid JSON). +# - models: +# - name: "gemini-2.5-pro" # Supports wildcards (e.g., "gemini-*") +# protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex +# params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON) +# "generationConfig.responseJsonSchema": "{\"type\":\"object\",\"properties\":{\"answer\":{\"type\":\"string\"}}}" # override: # Override rules always set parameters, overwriting any existing values. # - models: # - name: "gpt-*" # Supports wildcards (e.g., "gpt-*") # protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex # params: # JSON path (gjson/sjson syntax) -> value # "reasoning.effort": "high" +# override-raw: # Override raw rules always set parameters using raw JSON (must be valid JSON). +# - models: +# - name: "gpt-*" # Supports wildcards (e.g., "gpt-*") +# protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex +# params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON) +# "response_format": "{\"type\":\"json_schema\",\"json_schema\":{\"name\":\"answer\",\"schema\":{\"type\":\"object\"}}}" diff --git a/docker-compose.yml b/docker-compose.yml index 29712419..ad2190c2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,7 +22,7 @@ services: - "51121:51121" - "11451:11451" volumes: - - ./config.yaml:/CLIProxyAPI/config.yaml - - ./auths:/root/.cli-proxy-api - - ./logs:/CLIProxyAPI/logs + - ${CLI_PROXY_CONFIG_PATH:-./config.yaml}:/CLIProxyAPI/config.yaml + - ${CLI_PROXY_AUTH_PATH:-./auths}:/root/.cli-proxy-api + - ${CLI_PROXY_LOG_PATH:-./logs}:/CLIProxyAPI/logs restart: unless-stopped diff --git a/internal/api/handlers/management/auth_files.go b/internal/api/handlers/management/auth_files.go index 27c9a902..e6830d1d 100644 --- a/internal/api/handlers/management/auth_files.go +++ b/internal/api/handlers/management/auth_files.go @@ -1703,7 +1703,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) { // Create token storage tokenStorage := qwenAuth.CreateTokenStorage(tokenData) - tokenStorage.Email = fmt.Sprintf("qwen-%d", time.Now().UnixMilli()) + tokenStorage.Email = fmt.Sprintf("%d", time.Now().UnixMilli()) record := &coreauth.Auth{ ID: fmt.Sprintf("qwen-%s.json", tokenStorage.Email), Provider: "qwen", @@ -1808,7 +1808,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) { tokenStorage := authSvc.CreateTokenStorage(tokenData) identifier := strings.TrimSpace(tokenStorage.Email) if identifier == "" { - identifier = fmt.Sprintf("iflow-%d", time.Now().UnixMilli()) + identifier = fmt.Sprintf("%d", time.Now().UnixMilli()) tokenStorage.Email = identifier } record := &coreauth.Auth{ @@ -1893,15 +1893,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) { fileName := iflowauth.SanitizeIFlowFileName(email) if fileName == "" { fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli()) + } else { + fileName = fmt.Sprintf("iflow-%s", fileName) } tokenStorage.Email = email timestamp := time.Now().Unix() record := &coreauth.Auth{ - ID: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp), + ID: fmt.Sprintf("%s-%d.json", fileName, timestamp), Provider: "iflow", - FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp), + FileName: fmt.Sprintf("%s-%d.json", fileName, timestamp), Storage: tokenStorage, Metadata: map[string]any{ "email": email, diff --git a/internal/api/handlers/management/config_lists.go b/internal/api/handlers/management/config_lists.go index edb7a677..4e0e0284 100644 --- a/internal/api/handlers/management/config_lists.go +++ b/internal/api/handlers/management/config_lists.go @@ -703,21 +703,21 @@ func (h *Handler) DeleteOAuthExcludedModels(c *gin.Context) { h.persist(c) } -// oauth-model-mappings: map[string][]ModelNameMapping -func (h *Handler) GetOAuthModelMappings(c *gin.Context) { - c.JSON(200, gin.H{"oauth-model-mappings": sanitizedOAuthModelMappings(h.cfg.OAuthModelMappings)}) +// oauth-model-alias: map[string][]OAuthModelAlias +func (h *Handler) GetOAuthModelAlias(c *gin.Context) { + c.JSON(200, gin.H{"oauth-model-alias": sanitizedOAuthModelAlias(h.cfg.OAuthModelAlias)}) } -func (h *Handler) PutOAuthModelMappings(c *gin.Context) { +func (h *Handler) PutOAuthModelAlias(c *gin.Context) { data, err := c.GetRawData() if err != nil { c.JSON(400, gin.H{"error": "failed to read body"}) return } - var entries map[string][]config.ModelNameMapping + var entries map[string][]config.OAuthModelAlias if err = json.Unmarshal(data, &entries); err != nil { var wrapper struct { - Items map[string][]config.ModelNameMapping `json:"items"` + Items map[string][]config.OAuthModelAlias `json:"items"` } if err2 := json.Unmarshal(data, &wrapper); err2 != nil { c.JSON(400, gin.H{"error": "invalid body"}) @@ -725,15 +725,15 @@ func (h *Handler) PutOAuthModelMappings(c *gin.Context) { } entries = wrapper.Items } - h.cfg.OAuthModelMappings = sanitizedOAuthModelMappings(entries) + h.cfg.OAuthModelAlias = sanitizedOAuthModelAlias(entries) h.persist(c) } -func (h *Handler) PatchOAuthModelMappings(c *gin.Context) { +func (h *Handler) PatchOAuthModelAlias(c *gin.Context) { var body struct { - Provider *string `json:"provider"` - Channel *string `json:"channel"` - Mappings []config.ModelNameMapping `json:"mappings"` + Provider *string `json:"provider"` + Channel *string `json:"channel"` + Aliases []config.OAuthModelAlias `json:"aliases"` } if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil { c.JSON(400, gin.H{"error": "invalid body"}) @@ -751,32 +751,32 @@ func (h *Handler) PatchOAuthModelMappings(c *gin.Context) { return } - normalizedMap := sanitizedOAuthModelMappings(map[string][]config.ModelNameMapping{channel: body.Mappings}) + normalizedMap := sanitizedOAuthModelAlias(map[string][]config.OAuthModelAlias{channel: body.Aliases}) normalized := normalizedMap[channel] if len(normalized) == 0 { - if h.cfg.OAuthModelMappings == nil { + if h.cfg.OAuthModelAlias == nil { c.JSON(404, gin.H{"error": "channel not found"}) return } - if _, ok := h.cfg.OAuthModelMappings[channel]; !ok { + if _, ok := h.cfg.OAuthModelAlias[channel]; !ok { c.JSON(404, gin.H{"error": "channel not found"}) return } - delete(h.cfg.OAuthModelMappings, channel) - if len(h.cfg.OAuthModelMappings) == 0 { - h.cfg.OAuthModelMappings = nil + delete(h.cfg.OAuthModelAlias, channel) + if len(h.cfg.OAuthModelAlias) == 0 { + h.cfg.OAuthModelAlias = nil } h.persist(c) return } - if h.cfg.OAuthModelMappings == nil { - h.cfg.OAuthModelMappings = make(map[string][]config.ModelNameMapping) + if h.cfg.OAuthModelAlias == nil { + h.cfg.OAuthModelAlias = make(map[string][]config.OAuthModelAlias) } - h.cfg.OAuthModelMappings[channel] = normalized + h.cfg.OAuthModelAlias[channel] = normalized h.persist(c) } -func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) { +func (h *Handler) DeleteOAuthModelAlias(c *gin.Context) { channel := strings.ToLower(strings.TrimSpace(c.Query("channel"))) if channel == "" { channel = strings.ToLower(strings.TrimSpace(c.Query("provider"))) @@ -785,17 +785,17 @@ func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) { c.JSON(400, gin.H{"error": "missing channel"}) return } - if h.cfg.OAuthModelMappings == nil { + if h.cfg.OAuthModelAlias == nil { c.JSON(404, gin.H{"error": "channel not found"}) return } - if _, ok := h.cfg.OAuthModelMappings[channel]; !ok { + if _, ok := h.cfg.OAuthModelAlias[channel]; !ok { c.JSON(404, gin.H{"error": "channel not found"}) return } - delete(h.cfg.OAuthModelMappings, channel) - if len(h.cfg.OAuthModelMappings) == 0 { - h.cfg.OAuthModelMappings = nil + delete(h.cfg.OAuthModelAlias, channel) + if len(h.cfg.OAuthModelAlias) == 0 { + h.cfg.OAuthModelAlias = nil } h.persist(c) } @@ -1042,26 +1042,26 @@ func normalizeVertexCompatKey(entry *config.VertexCompatKey) { entry.Models = normalized } -func sanitizedOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string][]config.ModelNameMapping { +func sanitizedOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string][]config.OAuthModelAlias { if len(entries) == 0 { return nil } - copied := make(map[string][]config.ModelNameMapping, len(entries)) - for channel, mappings := range entries { - if len(mappings) == 0 { + copied := make(map[string][]config.OAuthModelAlias, len(entries)) + for channel, aliases := range entries { + if len(aliases) == 0 { continue } - copied[channel] = append([]config.ModelNameMapping(nil), mappings...) + copied[channel] = append([]config.OAuthModelAlias(nil), aliases...) } if len(copied) == 0 { return nil } - cfg := config.Config{OAuthModelMappings: copied} - cfg.SanitizeOAuthModelMappings() - if len(cfg.OAuthModelMappings) == 0 { + cfg := config.Config{OAuthModelAlias: copied} + cfg.SanitizeOAuthModelAlias() + if len(cfg.OAuthModelAlias) == 0 { return nil } - return cfg.OAuthModelMappings + return cfg.OAuthModelAlias } // GetAmpCode returns the complete ampcode configuration. diff --git a/internal/api/handlers/management/logs.go b/internal/api/handlers/management/logs.go index 2612318a..b64cd619 100644 --- a/internal/api/handlers/management/logs.go +++ b/internal/api/handlers/management/logs.go @@ -13,7 +13,7 @@ import ( "time" "github.com/gin-gonic/gin" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/logging" ) const ( @@ -360,16 +360,7 @@ func (h *Handler) logDirectory() string { if h.logDir != "" { return h.logDir } - if base := util.WritablePath(); base != "" { - return filepath.Join(base, "logs") - } - if h.configFilePath != "" { - dir := filepath.Dir(h.configFilePath) - if dir != "" && dir != "." { - return filepath.Join(dir, "logs") - } - } - return "logs" + return logging.ResolveLogDirectory(h.cfg) } func (h *Handler) collectLogFiles(dir string) ([]string, error) { diff --git a/internal/api/modules/amp/fallback_handlers.go b/internal/api/modules/amp/fallback_handlers.go index 940bd5e8..7d7f7f5f 100644 --- a/internal/api/modules/amp/fallback_handlers.go +++ b/internal/api/modules/amp/fallback_handlers.go @@ -8,6 +8,7 @@ import ( "time" "github.com/gin-gonic/gin" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" "github.com/tidwall/gjson" @@ -134,10 +135,11 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc } // Normalize model (handles dynamic thinking suffixes) - normalizedModel, thinkingMetadata := util.NormalizeThinkingModel(modelName) + suffixResult := thinking.ParseSuffix(modelName) + normalizedModel := suffixResult.ModelName thinkingSuffix := "" - if thinkingMetadata != nil && strings.HasPrefix(modelName, normalizedModel) { - thinkingSuffix = modelName[len(normalizedModel):] + if suffixResult.HasSuffix { + thinkingSuffix = "(" + suffixResult.RawSuffix + ")" } resolveMappedModel := func() (string, []string) { @@ -157,13 +159,13 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc // Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target // already specifies its own thinking suffix. if thinkingSuffix != "" { - _, mappedThinkingMetadata := util.NormalizeThinkingModel(mappedModel) - if mappedThinkingMetadata == nil { + mappedSuffixResult := thinking.ParseSuffix(mappedModel) + if !mappedSuffixResult.HasSuffix { mappedModel += thinkingSuffix } } - mappedBaseModel, _ := util.NormalizeThinkingModel(mappedModel) + mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName mappedProviders := util.GetProviderName(mappedBaseModel) if len(mappedProviders) == 0 { return "", nil diff --git a/internal/api/modules/amp/model_mapping.go b/internal/api/modules/amp/model_mapping.go index 4b629b62..4159a2b5 100644 --- a/internal/api/modules/amp/model_mapping.go +++ b/internal/api/modules/amp/model_mapping.go @@ -8,6 +8,7 @@ import ( "sync" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" ) @@ -44,6 +45,11 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper { // MapModel checks if a mapping exists for the requested model and if the // target model has available local providers. Returns the mapped model name // or empty string if no valid mapping exists. +// +// If the requested model contains a thinking suffix (e.g., "g25p(8192)"), +// the suffix is preserved in the returned model name (e.g., "gemini-2.5-pro(8192)"). +// However, if the mapping target already contains a suffix, the config suffix +// takes priority over the user's suffix. func (m *DefaultModelMapper) MapModel(requestedModel string) string { if requestedModel == "" { return "" @@ -52,16 +58,20 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string { m.mu.RLock() defer m.mu.RUnlock() - // Normalize the requested model for lookup - normalizedRequest := strings.ToLower(strings.TrimSpace(requestedModel)) + // Extract thinking suffix from requested model using ParseSuffix + requestResult := thinking.ParseSuffix(requestedModel) + baseModel := requestResult.ModelName - // Check for direct mapping - targetModel, exists := m.mappings[normalizedRequest] + // Normalize the base model for lookup (case-insensitive) + normalizedBase := strings.ToLower(strings.TrimSpace(baseModel)) + + // Check for direct mapping using base model name + targetModel, exists := m.mappings[normalizedBase] if !exists { - // Try regex mappings in order - base, _ := util.NormalizeThinkingModel(requestedModel) + // Try regex mappings in order using base model only + // (suffix is handled separately via ParseSuffix) for _, rm := range m.regexps { - if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) { + if rm.re.MatchString(baseModel) { targetModel = rm.to exists = true break @@ -72,14 +82,28 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string { } } - // Verify target model has available providers - normalizedTarget, _ := util.NormalizeThinkingModel(targetModel) - providers := util.GetProviderName(normalizedTarget) + // Check if target model already has a thinking suffix (config priority) + targetResult := thinking.ParseSuffix(targetModel) + + // Verify target model has available providers (use base model for lookup) + providers := util.GetProviderName(targetResult.ModelName) if len(providers) == 0 { log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel) return "" } + // Suffix handling: config suffix takes priority, otherwise preserve user suffix + if targetResult.HasSuffix { + // Config's "to" already contains a suffix - use it as-is (config priority) + return targetModel + } + + // Preserve user's thinking suffix on the mapped model + // (skip empty suffixes to avoid returning "model()") + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return targetModel + "(" + requestResult.RawSuffix + ")" + } + // Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go return targetModel } diff --git a/internal/api/modules/amp/model_mapping_test.go b/internal/api/modules/amp/model_mapping_test.go index 1b36f212..53165d22 100644 --- a/internal/api/modules/amp/model_mapping_test.go +++ b/internal/api/modules/amp/model_mapping_test.go @@ -217,10 +217,10 @@ func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) { mapper := NewModelMapper(mappings) - // Incoming model has reasoning suffix but should match base via regex + // Incoming model has reasoning suffix, regex matches base, suffix is preserved result := mapper.MapModel("gpt-5(high)") - if result != "gemini-2.5-pro" { - t.Errorf("Expected gemini-2.5-pro, got %s", result) + if result != "gemini-2.5-pro(high)" { + t.Errorf("Expected gemini-2.5-pro(high), got %s", result) } } @@ -281,3 +281,95 @@ func TestModelMapper_Regex_CaseInsensitive(t *testing.T) { t.Errorf("Expected claude-sonnet-4, got %s", result) } } + +func TestModelMapper_SuffixPreservation(t *testing.T) { + reg := registry.GetGlobalRegistry() + + // Register test models + reg.RegisterClient("test-client-suffix", "gemini", []*registry.ModelInfo{ + {ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"}, + }) + reg.RegisterClient("test-client-suffix-2", "claude", []*registry.ModelInfo{ + {ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"}, + }) + defer reg.UnregisterClient("test-client-suffix") + defer reg.UnregisterClient("test-client-suffix-2") + + tests := []struct { + name string + mappings []config.AmpModelMapping + input string + want string + }{ + { + name: "numeric suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(8192)", + want: "gemini-2.5-pro(8192)", + }, + { + name: "level suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(high)", + want: "gemini-2.5-pro(high)", + }, + { + name: "no suffix unchanged", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p", + want: "gemini-2.5-pro", + }, + { + name: "config suffix takes priority", + mappings: []config.AmpModelMapping{{From: "alias", To: "gemini-2.5-pro(medium)"}}, + input: "alias(high)", + want: "gemini-2.5-pro(medium)", + }, + { + name: "regex with suffix preserved", + mappings: []config.AmpModelMapping{{From: "^g25.*", To: "gemini-2.5-pro", Regex: true}}, + input: "g25p(8192)", + want: "gemini-2.5-pro(8192)", + }, + { + name: "auto suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(auto)", + want: "gemini-2.5-pro(auto)", + }, + { + name: "none suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(none)", + want: "gemini-2.5-pro(none)", + }, + { + name: "case insensitive base lookup with suffix", + mappings: []config.AmpModelMapping{{From: "G25P", To: "gemini-2.5-pro"}}, + input: "g25p(high)", + want: "gemini-2.5-pro(high)", + }, + { + name: "empty suffix filtered out", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p()", + want: "gemini-2.5-pro", + }, + { + name: "incomplete suffix treated as no suffix", + mappings: []config.AmpModelMapping{{From: "g25p(high", To: "gemini-2.5-pro"}}, + input: "g25p(high", + want: "gemini-2.5-pro", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mapper := NewModelMapper(tt.mappings) + got := mapper.MapModel(tt.input) + if got != tt.want { + t.Errorf("MapModel(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} diff --git a/internal/api/server.go b/internal/api/server.go index 05bb2fee..aa78ac2a 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -26,6 +26,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/logging" "github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset" + "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/usage" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access" @@ -254,15 +255,13 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk } managementasset.SetCurrentConfig(cfg) auth.SetQuotaCooldownDisabled(cfg.DisableCooling) + misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled) // Initialize management handler s.mgmt = managementHandlers.NewHandler(cfg, configFilePath, authManager) if optionState.localPassword != "" { s.mgmt.SetLocalPassword(optionState.localPassword) } - logDir := filepath.Join(s.currentPath, "logs") - if base := util.WritablePath(); base != "" { - logDir = filepath.Join(base, "logs") - } + logDir := logging.ResolveLogDirectory(cfg) s.mgmt.SetLogDirectory(logDir) s.localPassword = optionState.localPassword @@ -601,10 +600,10 @@ func (s *Server) registerManagementRoutes() { mgmt.PATCH("/oauth-excluded-models", s.mgmt.PatchOAuthExcludedModels) mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels) - mgmt.GET("/oauth-model-mappings", s.mgmt.GetOAuthModelMappings) - mgmt.PUT("/oauth-model-mappings", s.mgmt.PutOAuthModelMappings) - mgmt.PATCH("/oauth-model-mappings", s.mgmt.PatchOAuthModelMappings) - mgmt.DELETE("/oauth-model-mappings", s.mgmt.DeleteOAuthModelMappings) + mgmt.GET("/oauth-model-alias", s.mgmt.GetOAuthModelAlias) + mgmt.PUT("/oauth-model-alias", s.mgmt.PutOAuthModelAlias) + mgmt.PATCH("/oauth-model-alias", s.mgmt.PatchOAuthModelAlias) + mgmt.DELETE("/oauth-model-alias", s.mgmt.DeleteOAuthModelAlias) mgmt.GET("/auth-files", s.mgmt.ListAuthFiles) mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels) @@ -912,6 +911,16 @@ func (s *Server) UpdateClients(cfg *config.Config) { log.Debugf("disable_cooling toggled to %t", cfg.DisableCooling) } } + + if oldCfg == nil || oldCfg.CodexInstructionsEnabled != cfg.CodexInstructionsEnabled { + misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled) + if oldCfg != nil { + log.Debugf("codex_instructions_enabled updated from %t to %t", oldCfg.CodexInstructionsEnabled, cfg.CodexInstructionsEnabled) + } else { + log.Debugf("codex_instructions_enabled toggled to %t", cfg.CodexInstructionsEnabled) + } + } + if s.handlers != nil && s.handlers.AuthManager != nil { s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second) } diff --git a/internal/auth/gemini/gemini_auth.go b/internal/auth/gemini/gemini_auth.go index 7b18e738..708ac809 100644 --- a/internal/auth/gemini/gemini_auth.go +++ b/internal/auth/gemini/gemini_auth.go @@ -29,8 +29,9 @@ import ( ) const ( - geminiOauthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com" - geminiOauthClientSecret = "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" + geminiOauthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com" + geminiOauthClientSecret = "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" + geminiDefaultCallbackPort = 8085 ) var ( @@ -49,8 +50,9 @@ type GeminiAuth struct { // WebLoginOptions customizes the interactive OAuth flow. type WebLoginOptions struct { - NoBrowser bool - Prompt func(string) (string, error) + NoBrowser bool + CallbackPort int + Prompt func(string) (string, error) } // NewGeminiAuth creates a new instance of GeminiAuth. @@ -72,6 +74,12 @@ func NewGeminiAuth() *GeminiAuth { // - *http.Client: An HTTP client configured with authentication // - error: An error if the client configuration fails, nil otherwise func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiTokenStorage, cfg *config.Config, opts *WebLoginOptions) (*http.Client, error) { + callbackPort := geminiDefaultCallbackPort + if opts != nil && opts.CallbackPort > 0 { + callbackPort = opts.CallbackPort + } + callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort) + // Configure proxy settings for the HTTP client if a proxy URL is provided. proxyURL, err := url.Parse(cfg.ProxyURL) if err == nil { @@ -106,7 +114,7 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken conf := &oauth2.Config{ ClientID: geminiOauthClientID, ClientSecret: geminiOauthClientSecret, - RedirectURL: "http://localhost:8085/oauth2callback", // This will be used by the local server. + RedirectURL: callbackURL, // This will be used by the local server. Scopes: geminiOauthScopes, Endpoint: google.Endpoint, } @@ -218,14 +226,20 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf // - *oauth2.Token: The OAuth2 token obtained from the authorization flow // - error: An error if the token acquisition fails, nil otherwise func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, opts *WebLoginOptions) (*oauth2.Token, error) { + callbackPort := geminiDefaultCallbackPort + if opts != nil && opts.CallbackPort > 0 { + callbackPort = opts.CallbackPort + } + callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort) + // Use a channel to pass the authorization code from the HTTP handler to the main function. codeChan := make(chan string, 1) errChan := make(chan error, 1) // Create a new HTTP server with its own multiplexer. mux := http.NewServeMux() - server := &http.Server{Addr: ":8085", Handler: mux} - config.RedirectURL = "http://localhost:8085/oauth2callback" + server := &http.Server{Addr: fmt.Sprintf(":%d", callbackPort), Handler: mux} + config.RedirectURL = callbackURL mux.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) { if err := r.URL.Query().Get("error"); err != "" { @@ -277,13 +291,13 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, // Check if browser is available if !browser.IsAvailable() { log.Warn("No browser available on this system") - util.PrintSSHTunnelInstructions(8085) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL) } else { if err := browser.OpenURL(authURL); err != nil { authErr := codex.NewAuthenticationError(codex.ErrBrowserOpenFailed, err) log.Warn(codex.GetUserFriendlyMessage(authErr)) - util.PrintSSHTunnelInstructions(8085) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL) // Log platform info for debugging @@ -294,7 +308,7 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, } } } else { - util.PrintSSHTunnelInstructions(8085) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Please open this URL in your browser:\n\n%s\n", authURL) } diff --git a/internal/cmd/anthropic_login.go b/internal/cmd/anthropic_login.go index 6efd87a8..dafdd02b 100644 --- a/internal/cmd/anthropic_login.go +++ b/internal/cmd/anthropic_login.go @@ -32,9 +32,10 @@ func DoClaudeLogin(cfg *config.Config, options *LoginOptions) { manager := newAuthManager() authOpts := &sdkAuth.LoginOptions{ - NoBrowser: options.NoBrowser, - Metadata: map[string]string{}, - Prompt: promptFn, + NoBrowser: options.NoBrowser, + CallbackPort: options.CallbackPort, + Metadata: map[string]string{}, + Prompt: promptFn, } _, savedPath, err := manager.Login(context.Background(), "claude", cfg, authOpts) diff --git a/internal/cmd/antigravity_login.go b/internal/cmd/antigravity_login.go index 1cd42899..2efbaeee 100644 --- a/internal/cmd/antigravity_login.go +++ b/internal/cmd/antigravity_login.go @@ -22,9 +22,10 @@ func DoAntigravityLogin(cfg *config.Config, options *LoginOptions) { manager := newAuthManager() authOpts := &sdkAuth.LoginOptions{ - NoBrowser: options.NoBrowser, - Metadata: map[string]string{}, - Prompt: promptFn, + NoBrowser: options.NoBrowser, + CallbackPort: options.CallbackPort, + Metadata: map[string]string{}, + Prompt: promptFn, } record, savedPath, err := manager.Login(context.Background(), "antigravity", cfg, authOpts) diff --git a/internal/cmd/iflow_login.go b/internal/cmd/iflow_login.go index cf00b63c..07360b8c 100644 --- a/internal/cmd/iflow_login.go +++ b/internal/cmd/iflow_login.go @@ -24,9 +24,10 @@ func DoIFlowLogin(cfg *config.Config, options *LoginOptions) { } authOpts := &sdkAuth.LoginOptions{ - NoBrowser: options.NoBrowser, - Metadata: map[string]string{}, - Prompt: promptFn, + NoBrowser: options.NoBrowser, + CallbackPort: options.CallbackPort, + Metadata: map[string]string{}, + Prompt: promptFn, } _, savedPath, err := manager.Login(context.Background(), "iflow", cfg, authOpts) diff --git a/internal/cmd/login.go b/internal/cmd/login.go index 3bb0b9a5..558dacf6 100644 --- a/internal/cmd/login.go +++ b/internal/cmd/login.go @@ -67,10 +67,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) { } loginOpts := &sdkAuth.LoginOptions{ - NoBrowser: options.NoBrowser, - ProjectID: trimmedProjectID, - Metadata: map[string]string{}, - Prompt: callbackPrompt, + NoBrowser: options.NoBrowser, + ProjectID: trimmedProjectID, + CallbackPort: options.CallbackPort, + Metadata: map[string]string{}, + Prompt: callbackPrompt, } authenticator := sdkAuth.NewGeminiAuthenticator() @@ -88,8 +89,9 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) { geminiAuth := gemini.NewGeminiAuth() httpClient, errClient := geminiAuth.GetAuthenticatedClient(ctx, storage, cfg, &gemini.WebLoginOptions{ - NoBrowser: options.NoBrowser, - Prompt: callbackPrompt, + NoBrowser: options.NoBrowser, + CallbackPort: options.CallbackPort, + Prompt: callbackPrompt, }) if errClient != nil { log.Errorf("Gemini authentication failed: %v", errClient) diff --git a/internal/cmd/openai_login.go b/internal/cmd/openai_login.go index d981f6ae..5f2fb162 100644 --- a/internal/cmd/openai_login.go +++ b/internal/cmd/openai_login.go @@ -19,6 +19,9 @@ type LoginOptions struct { // NoBrowser indicates whether to skip opening the browser automatically. NoBrowser bool + // CallbackPort overrides the local OAuth callback port when set (>0). + CallbackPort int + // Prompt allows the caller to provide interactive input when needed. Prompt func(prompt string) (string, error) } @@ -43,9 +46,10 @@ func DoCodexLogin(cfg *config.Config, options *LoginOptions) { manager := newAuthManager() authOpts := &sdkAuth.LoginOptions{ - NoBrowser: options.NoBrowser, - Metadata: map[string]string{}, - Prompt: promptFn, + NoBrowser: options.NoBrowser, + CallbackPort: options.CallbackPort, + Metadata: map[string]string{}, + Prompt: promptFn, } _, savedPath, err := manager.Login(context.Background(), "codex", cfg, authOpts) diff --git a/internal/cmd/qwen_login.go b/internal/cmd/qwen_login.go index 27edf408..92a57aa5 100644 --- a/internal/cmd/qwen_login.go +++ b/internal/cmd/qwen_login.go @@ -36,9 +36,10 @@ func DoQwenLogin(cfg *config.Config, options *LoginOptions) { } authOpts := &sdkAuth.LoginOptions{ - NoBrowser: options.NoBrowser, - Metadata: map[string]string{}, - Prompt: promptFn, + NoBrowser: options.NoBrowser, + CallbackPort: options.CallbackPort, + Metadata: map[string]string{}, + Prompt: promptFn, } _, savedPath, err := manager.Login(context.Background(), "qwen", cfg, authOpts) diff --git a/internal/config/config.go b/internal/config/config.go index 99beb481..3b7e9fa7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -6,12 +6,14 @@ package config import ( "bytes" + "encoding/json" "errors" "fmt" "os" "strings" "syscall" + log "github.com/sirupsen/logrus" "golang.org/x/crypto/bcrypt" "gopkg.in/yaml.v3" ) @@ -69,6 +71,11 @@ type Config struct { // WebsocketAuth enables or disables authentication for the WebSocket API. WebsocketAuth bool `yaml:"ws-auth" json:"ws-auth"` + // CodexInstructionsEnabled controls whether official Codex instructions are injected. + // When false (default), CodexInstructionsForModel returns immediately without modification. + // When true, the original instruction injection logic is used. + CodexInstructionsEnabled bool `yaml:"codex-instructions-enabled" json:"codex-instructions-enabled"` + // GeminiKey defines Gemini API key configurations with optional routing overrides. GeminiKey []GeminiKey `yaml:"gemini-api-key" json:"gemini-api-key"` @@ -91,13 +98,13 @@ type Config struct { // OAuthExcludedModels defines per-provider global model exclusions applied to OAuth/file-backed auth entries. OAuthExcludedModels map[string][]string `yaml:"oauth-excluded-models,omitempty" json:"oauth-excluded-models,omitempty"` - // OAuthModelMappings defines global model name mappings for OAuth/file-backed auth channels. - // These mappings affect both model listing and model routing for supported channels: + // OAuthModelAlias defines global model name aliases for OAuth/file-backed auth channels. + // These aliases affect both model listing and model routing for supported channels: // gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow. // // NOTE: This does not apply to existing per-credential model alias features under: // gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, and ampcode. - OAuthModelMappings map[string][]ModelNameMapping `yaml:"oauth-model-mappings,omitempty" json:"oauth-model-mappings,omitempty"` + OAuthModelAlias map[string][]OAuthModelAlias `yaml:"oauth-model-alias,omitempty" json:"oauth-model-alias,omitempty"` // Payload defines default and override rules for provider payload parameters. Payload PayloadConfig `yaml:"payload" json:"payload"` @@ -145,11 +152,11 @@ type RoutingConfig struct { Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"` } -// ModelNameMapping defines a model ID mapping for a specific channel. +// OAuthModelAlias defines a model ID alias for a specific channel. // It maps the upstream model name (Name) to the client-visible alias (Alias). // When Fork is true, the alias is added as an additional model in listings while // keeping the original model ID available. -type ModelNameMapping struct { +type OAuthModelAlias struct { Name string `yaml:"name" json:"name"` Alias string `yaml:"alias" json:"alias"` Fork bool `yaml:"fork,omitempty" json:"fork,omitempty"` @@ -216,8 +223,12 @@ type AmpUpstreamAPIKeyEntry struct { type PayloadConfig struct { // Default defines rules that only set parameters when they are missing in the payload. Default []PayloadRule `yaml:"default" json:"default"` + // DefaultRaw defines rules that set raw JSON values only when they are missing. + DefaultRaw []PayloadRule `yaml:"default-raw" json:"default-raw"` // Override defines rules that always set parameters, overwriting any existing values. Override []PayloadRule `yaml:"override" json:"override"` + // OverrideRaw defines rules that always set raw JSON values, overwriting any existing values. + OverrideRaw []PayloadRule `yaml:"override-raw" json:"override-raw"` } // PayloadRule describes a single rule targeting a list of models with parameter updates. @@ -225,6 +236,7 @@ type PayloadRule struct { // Models lists model entries with name pattern and protocol constraint. Models []PayloadModelRule `yaml:"models" json:"models"` // Params maps JSON paths (gjson/sjson syntax) to values written into the payload. + // For *-raw rules, values are treated as raw JSON fragments (strings are used as-is). Params map[string]any `yaml:"params" json:"params"` } @@ -242,6 +254,10 @@ type ClaudeKey struct { // APIKey is the authentication key for accessing Claude API services. APIKey string `yaml:"api-key" json:"api-key"` + // Priority controls selection preference when multiple credentials match. + // Higher values are preferred; defaults to 0. + Priority int `yaml:"priority,omitempty" json:"priority,omitempty"` + // Prefix optionally namespaces models for this credential (e.g., "teamA/claude-sonnet-4"). Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` @@ -262,6 +278,9 @@ type ClaudeKey struct { ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` } +func (k ClaudeKey) GetAPIKey() string { return k.APIKey } +func (k ClaudeKey) GetBaseURL() string { return k.BaseURL } + // ClaudeModel describes a mapping between an alias and the actual upstream model name. type ClaudeModel struct { // Name is the upstream model identifier used when issuing requests. @@ -280,6 +299,10 @@ type CodexKey struct { // APIKey is the authentication key for accessing Codex API services. APIKey string `yaml:"api-key" json:"api-key"` + // Priority controls selection preference when multiple credentials match. + // Higher values are preferred; defaults to 0. + Priority int `yaml:"priority,omitempty" json:"priority,omitempty"` + // Prefix optionally namespaces models for this credential (e.g., "teamA/gpt-5-codex"). Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` @@ -300,6 +323,9 @@ type CodexKey struct { ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` } +func (k CodexKey) GetAPIKey() string { return k.APIKey } +func (k CodexKey) GetBaseURL() string { return k.BaseURL } + // CodexModel describes a mapping between an alias and the actual upstream model name. type CodexModel struct { // Name is the upstream model identifier used when issuing requests. @@ -318,6 +344,10 @@ type GeminiKey struct { // APIKey is the authentication key for accessing Gemini API services. APIKey string `yaml:"api-key" json:"api-key"` + // Priority controls selection preference when multiple credentials match. + // Higher values are preferred; defaults to 0. + Priority int `yaml:"priority,omitempty" json:"priority,omitempty"` + // Prefix optionally namespaces models for this credential (e.g., "teamA/gemini-3-pro-preview"). Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` @@ -337,6 +367,9 @@ type GeminiKey struct { ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` } +func (k GeminiKey) GetAPIKey() string { return k.APIKey } +func (k GeminiKey) GetBaseURL() string { return k.BaseURL } + // GeminiModel describes a mapping between an alias and the actual upstream model name. type GeminiModel struct { // Name is the upstream model identifier used when issuing requests. @@ -355,6 +388,10 @@ type OpenAICompatibility struct { // Name is the identifier for this OpenAI compatibility configuration. Name string `yaml:"name" json:"name"` + // Priority controls selection preference when multiple providers or credentials match. + // Higher values are preferred; defaults to 0. + Priority int `yaml:"priority,omitempty" json:"priority,omitempty"` + // Prefix optionally namespaces model aliases for this provider (e.g., "teamA/kimi-k2"). Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` @@ -390,6 +427,9 @@ type OpenAICompatibilityModel struct { Alias string `yaml:"alias" json:"alias"` } +func (m OpenAICompatibilityModel) GetName() string { return m.Name } +func (m OpenAICompatibilityModel) GetAlias() string { return m.Alias } + // LoadConfig reads a YAML configuration file from the given path, // unmarshals it into a Config struct, applies environment variable overrides, // and returns it. @@ -408,6 +448,15 @@ func LoadConfig(configFile string) (*Config, error) { // If optional is true and the file is missing, it returns an empty Config. // If optional is true and the file is empty or invalid, it returns an empty Config. func LoadConfigOptional(configFile string, optional bool) (*Config, error) { + // Perform oauth-model-alias migration before loading config. + // This migrates oauth-model-mappings to oauth-model-alias if needed. + if migrated, err := MigrateOAuthModelAlias(configFile); err != nil { + // Log warning but don't fail - config loading should still work + fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err) + } else if migrated { + fmt.Println("Migrated oauth-model-mappings to oauth-model-alias") + } + // Read the entire configuration file into memory. data, err := os.ReadFile(configFile) if err != nil { @@ -500,8 +549,11 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) { // Normalize OAuth provider model exclusion map. cfg.OAuthExcludedModels = NormalizeOAuthExcludedModels(cfg.OAuthExcludedModels) - // Normalize global OAuth model name mappings. - cfg.SanitizeOAuthModelMappings() + // Normalize global OAuth model name aliases. + cfg.SanitizeOAuthModelAlias() + + // Validate raw payload rules and drop invalid entries. + cfg.SanitizePayloadRules() if cfg.legacyMigrationPending { fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...") @@ -519,24 +571,79 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) { return &cfg, nil } -// SanitizeOAuthModelMappings normalizes and deduplicates global OAuth model name mappings. -// It trims whitespace, normalizes channel keys to lower-case, drops empty entries, -// allows multiple aliases per upstream name, and ensures aliases are unique within each channel. -func (cfg *Config) SanitizeOAuthModelMappings() { - if cfg == nil || len(cfg.OAuthModelMappings) == 0 { +// SanitizePayloadRules validates raw JSON payload rule params and drops invalid rules. +func (cfg *Config) SanitizePayloadRules() { + if cfg == nil { return } - out := make(map[string][]ModelNameMapping, len(cfg.OAuthModelMappings)) - for rawChannel, mappings := range cfg.OAuthModelMappings { - channel := strings.ToLower(strings.TrimSpace(rawChannel)) - if channel == "" || len(mappings) == 0 { + cfg.Payload.DefaultRaw = sanitizePayloadRawRules(cfg.Payload.DefaultRaw, "default-raw") + cfg.Payload.OverrideRaw = sanitizePayloadRawRules(cfg.Payload.OverrideRaw, "override-raw") +} + +func sanitizePayloadRawRules(rules []PayloadRule, section string) []PayloadRule { + if len(rules) == 0 { + return rules + } + out := make([]PayloadRule, 0, len(rules)) + for i := range rules { + rule := rules[i] + if len(rule.Params) == 0 { continue } - seenAlias := make(map[string]struct{}, len(mappings)) - clean := make([]ModelNameMapping, 0, len(mappings)) - for _, mapping := range mappings { - name := strings.TrimSpace(mapping.Name) - alias := strings.TrimSpace(mapping.Alias) + invalid := false + for path, value := range rule.Params { + raw, ok := payloadRawString(value) + if !ok { + continue + } + trimmed := bytes.TrimSpace(raw) + if len(trimmed) == 0 || !json.Valid(trimmed) { + log.WithFields(log.Fields{ + "section": section, + "rule_index": i + 1, + "param": path, + }).Warn("payload rule dropped: invalid raw JSON") + invalid = true + break + } + } + if invalid { + continue + } + out = append(out, rule) + } + return out +} + +func payloadRawString(value any) ([]byte, bool) { + switch typed := value.(type) { + case string: + return []byte(typed), true + case []byte: + return typed, true + default: + return nil, false + } +} + +// SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases. +// It trims whitespace, normalizes channel keys to lower-case, drops empty entries, +// allows multiple aliases per upstream name, and ensures aliases are unique within each channel. +func (cfg *Config) SanitizeOAuthModelAlias() { + if cfg == nil || len(cfg.OAuthModelAlias) == 0 { + return + } + out := make(map[string][]OAuthModelAlias, len(cfg.OAuthModelAlias)) + for rawChannel, aliases := range cfg.OAuthModelAlias { + channel := strings.ToLower(strings.TrimSpace(rawChannel)) + if channel == "" || len(aliases) == 0 { + continue + } + seenAlias := make(map[string]struct{}, len(aliases)) + clean := make([]OAuthModelAlias, 0, len(aliases)) + for _, entry := range aliases { + name := strings.TrimSpace(entry.Name) + alias := strings.TrimSpace(entry.Alias) if name == "" || alias == "" { continue } @@ -548,13 +655,13 @@ func (cfg *Config) SanitizeOAuthModelMappings() { continue } seenAlias[aliasKey] = struct{}{} - clean = append(clean, ModelNameMapping{Name: name, Alias: alias, Fork: mapping.Fork}) + clean = append(clean, OAuthModelAlias{Name: name, Alias: alias, Fork: entry.Fork}) } if len(clean) > 0 { out[channel] = clean } } - cfg.OAuthModelMappings = out + cfg.OAuthModelAlias = out } // SanitizeOpenAICompatibility removes OpenAI-compatibility provider entries that are diff --git a/internal/config/oauth_model_alias_migration.go b/internal/config/oauth_model_alias_migration.go new file mode 100644 index 00000000..5cc8053a --- /dev/null +++ b/internal/config/oauth_model_alias_migration.go @@ -0,0 +1,275 @@ +package config + +import ( + "os" + "strings" + + "gopkg.in/yaml.v3" +) + +// antigravityModelConversionTable maps old built-in aliases to actual model names +// for the antigravity channel during migration. +var antigravityModelConversionTable = map[string]string{ + "gemini-2.5-computer-use-preview-10-2025": "rev19-uic3-1p", + "gemini-3-pro-image-preview": "gemini-3-pro-image", + "gemini-3-pro-preview": "gemini-3-pro-high", + "gemini-3-flash-preview": "gemini-3-flash", + "gemini-claude-sonnet-4-5": "claude-sonnet-4-5", + "gemini-claude-sonnet-4-5-thinking": "claude-sonnet-4-5-thinking", + "gemini-claude-opus-4-5-thinking": "claude-opus-4-5-thinking", +} + +// defaultAntigravityAliases returns the default oauth-model-alias configuration +// for the antigravity channel when neither field exists. +func defaultAntigravityAliases() []OAuthModelAlias { + return []OAuthModelAlias{ + {Name: "rev19-uic3-1p", Alias: "gemini-2.5-computer-use-preview-10-2025"}, + {Name: "gemini-3-pro-image", Alias: "gemini-3-pro-image-preview"}, + {Name: "gemini-3-pro-high", Alias: "gemini-3-pro-preview"}, + {Name: "gemini-3-flash", Alias: "gemini-3-flash-preview"}, + {Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"}, + {Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"}, + {Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"}, + } +} + +// MigrateOAuthModelAlias checks for and performs migration from oauth-model-mappings +// to oauth-model-alias at startup. Returns true if migration was performed. +// +// Migration flow: +// 1. Check if oauth-model-alias exists -> skip migration +// 2. Check if oauth-model-mappings exists -> convert and migrate +// - For antigravity channel, convert old built-in aliases to actual model names +// +// 3. Neither exists -> add default antigravity config +func MigrateOAuthModelAlias(configFile string) (bool, error) { + data, err := os.ReadFile(configFile) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + if len(data) == 0 { + return false, nil + } + + // Parse YAML into node tree to preserve structure + var root yaml.Node + if err := yaml.Unmarshal(data, &root); err != nil { + return false, nil + } + if root.Kind != yaml.DocumentNode || len(root.Content) == 0 { + return false, nil + } + rootMap := root.Content[0] + if rootMap == nil || rootMap.Kind != yaml.MappingNode { + return false, nil + } + + // Check if oauth-model-alias already exists + if findMapKeyIndex(rootMap, "oauth-model-alias") >= 0 { + return false, nil + } + + // Check if oauth-model-mappings exists + oldIdx := findMapKeyIndex(rootMap, "oauth-model-mappings") + if oldIdx >= 0 { + // Migrate from old field + return migrateFromOldField(configFile, &root, rootMap, oldIdx) + } + + // Neither field exists - add default antigravity config + return addDefaultAntigravityConfig(configFile, &root, rootMap) +} + +// migrateFromOldField converts oauth-model-mappings to oauth-model-alias +func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node, oldIdx int) (bool, error) { + if oldIdx+1 >= len(rootMap.Content) { + return false, nil + } + oldValue := rootMap.Content[oldIdx+1] + if oldValue == nil || oldValue.Kind != yaml.MappingNode { + return false, nil + } + + // Parse the old aliases + oldAliases := parseOldAliasNode(oldValue) + if len(oldAliases) == 0 { + // Remove the old field and write + removeMapKeyByIndex(rootMap, oldIdx) + return writeYAMLNode(configFile, root) + } + + // Convert model names for antigravity channel + newAliases := make(map[string][]OAuthModelAlias, len(oldAliases)) + for channel, entries := range oldAliases { + converted := make([]OAuthModelAlias, 0, len(entries)) + for _, entry := range entries { + newEntry := OAuthModelAlias{ + Name: entry.Name, + Alias: entry.Alias, + Fork: entry.Fork, + } + // Convert model names for antigravity channel + if strings.EqualFold(channel, "antigravity") { + if actual, ok := antigravityModelConversionTable[entry.Name]; ok { + newEntry.Name = actual + } + } + converted = append(converted, newEntry) + } + newAliases[channel] = converted + } + + // For antigravity channel, supplement missing default aliases + if antigravityEntries, exists := newAliases["antigravity"]; exists { + // Build a set of already configured model names (upstream names) + configuredModels := make(map[string]bool, len(antigravityEntries)) + for _, entry := range antigravityEntries { + configuredModels[entry.Name] = true + } + + // Add missing default aliases + for _, defaultAlias := range defaultAntigravityAliases() { + if !configuredModels[defaultAlias.Name] { + antigravityEntries = append(antigravityEntries, defaultAlias) + } + } + newAliases["antigravity"] = antigravityEntries + } + + // Build new node + newNode := buildOAuthModelAliasNode(newAliases) + + // Replace old key with new key and value + rootMap.Content[oldIdx].Value = "oauth-model-alias" + rootMap.Content[oldIdx+1] = newNode + + return writeYAMLNode(configFile, root) +} + +// addDefaultAntigravityConfig adds the default antigravity configuration +func addDefaultAntigravityConfig(configFile string, root *yaml.Node, rootMap *yaml.Node) (bool, error) { + defaults := map[string][]OAuthModelAlias{ + "antigravity": defaultAntigravityAliases(), + } + newNode := buildOAuthModelAliasNode(defaults) + + // Add new key-value pair + keyNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "oauth-model-alias"} + rootMap.Content = append(rootMap.Content, keyNode, newNode) + + return writeYAMLNode(configFile, root) +} + +// parseOldAliasNode parses the old oauth-model-mappings node structure +func parseOldAliasNode(node *yaml.Node) map[string][]OAuthModelAlias { + if node == nil || node.Kind != yaml.MappingNode { + return nil + } + result := make(map[string][]OAuthModelAlias) + for i := 0; i+1 < len(node.Content); i += 2 { + channelNode := node.Content[i] + entriesNode := node.Content[i+1] + if channelNode == nil || entriesNode == nil { + continue + } + channel := strings.ToLower(strings.TrimSpace(channelNode.Value)) + if channel == "" || entriesNode.Kind != yaml.SequenceNode { + continue + } + entries := make([]OAuthModelAlias, 0, len(entriesNode.Content)) + for _, entryNode := range entriesNode.Content { + if entryNode == nil || entryNode.Kind != yaml.MappingNode { + continue + } + entry := parseAliasEntry(entryNode) + if entry.Name != "" && entry.Alias != "" { + entries = append(entries, entry) + } + } + if len(entries) > 0 { + result[channel] = entries + } + } + return result +} + +// parseAliasEntry parses a single alias entry node +func parseAliasEntry(node *yaml.Node) OAuthModelAlias { + var entry OAuthModelAlias + for i := 0; i+1 < len(node.Content); i += 2 { + keyNode := node.Content[i] + valNode := node.Content[i+1] + if keyNode == nil || valNode == nil { + continue + } + switch strings.ToLower(strings.TrimSpace(keyNode.Value)) { + case "name": + entry.Name = strings.TrimSpace(valNode.Value) + case "alias": + entry.Alias = strings.TrimSpace(valNode.Value) + case "fork": + entry.Fork = strings.ToLower(strings.TrimSpace(valNode.Value)) == "true" + } + } + return entry +} + +// buildOAuthModelAliasNode creates a YAML node for oauth-model-alias +func buildOAuthModelAliasNode(aliases map[string][]OAuthModelAlias) *yaml.Node { + node := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"} + for channel, entries := range aliases { + channelNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: channel} + entriesNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"} + for _, entry := range entries { + entryNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"} + entryNode.Content = append(entryNode.Content, + &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "name"}, + &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Name}, + &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "alias"}, + &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Alias}, + ) + if entry.Fork { + entryNode.Content = append(entryNode.Content, + &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "fork"}, + &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!bool", Value: "true"}, + ) + } + entriesNode.Content = append(entriesNode.Content, entryNode) + } + node.Content = append(node.Content, channelNode, entriesNode) + } + return node +} + +// removeMapKeyByIndex removes a key-value pair from a mapping node by index +func removeMapKeyByIndex(mapNode *yaml.Node, keyIdx int) { + if mapNode == nil || mapNode.Kind != yaml.MappingNode { + return + } + if keyIdx < 0 || keyIdx+1 >= len(mapNode.Content) { + return + } + mapNode.Content = append(mapNode.Content[:keyIdx], mapNode.Content[keyIdx+2:]...) +} + +// writeYAMLNode writes the YAML node tree back to file +func writeYAMLNode(configFile string, root *yaml.Node) (bool, error) { + f, err := os.Create(configFile) + if err != nil { + return false, err + } + defer f.Close() + + enc := yaml.NewEncoder(f) + enc.SetIndent(2) + if err := enc.Encode(root); err != nil { + return false, err + } + if err := enc.Close(); err != nil { + return false, err + } + return true, nil +} diff --git a/internal/config/oauth_model_alias_migration_test.go b/internal/config/oauth_model_alias_migration_test.go new file mode 100644 index 00000000..db9c0a11 --- /dev/null +++ b/internal/config/oauth_model_alias_migration_test.go @@ -0,0 +1,242 @@ +package config + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +func TestMigrateOAuthModelAlias_SkipsIfNewFieldExists(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configFile := filepath.Join(dir, "config.yaml") + + content := `oauth-model-alias: + gemini-cli: + - name: "gemini-2.5-pro" + alias: "g2.5p" +` + if err := os.WriteFile(configFile, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + migrated, err := MigrateOAuthModelAlias(configFile) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if migrated { + t.Fatal("expected no migration when oauth-model-alias already exists") + } + + // Verify file unchanged + data, _ := os.ReadFile(configFile) + if !strings.Contains(string(data), "oauth-model-alias:") { + t.Fatal("file should still contain oauth-model-alias") + } +} + +func TestMigrateOAuthModelAlias_MigratesOldField(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configFile := filepath.Join(dir, "config.yaml") + + content := `oauth-model-mappings: + gemini-cli: + - name: "gemini-2.5-pro" + alias: "g2.5p" + fork: true +` + if err := os.WriteFile(configFile, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + migrated, err := MigrateOAuthModelAlias(configFile) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !migrated { + t.Fatal("expected migration to occur") + } + + // Verify new field exists and old field removed + data, _ := os.ReadFile(configFile) + if strings.Contains(string(data), "oauth-model-mappings:") { + t.Fatal("old field should be removed") + } + if !strings.Contains(string(data), "oauth-model-alias:") { + t.Fatal("new field should exist") + } + + // Parse and verify structure + var root yaml.Node + if err := yaml.Unmarshal(data, &root); err != nil { + t.Fatal(err) + } +} + +func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configFile := filepath.Join(dir, "config.yaml") + + // Use old model names that should be converted + content := `oauth-model-mappings: + antigravity: + - name: "gemini-2.5-computer-use-preview-10-2025" + alias: "computer-use" + - name: "gemini-3-pro-preview" + alias: "g3p" +` + if err := os.WriteFile(configFile, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + migrated, err := MigrateOAuthModelAlias(configFile) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !migrated { + t.Fatal("expected migration to occur") + } + + // Verify model names were converted + data, _ := os.ReadFile(configFile) + content = string(data) + if !strings.Contains(content, "rev19-uic3-1p") { + t.Fatal("expected gemini-2.5-computer-use-preview-10-2025 to be converted to rev19-uic3-1p") + } + if !strings.Contains(content, "gemini-3-pro-high") { + t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high") + } + + // Verify missing default aliases were supplemented + if !strings.Contains(content, "gemini-3-pro-image") { + t.Fatal("expected missing default alias gemini-3-pro-image to be added") + } + if !strings.Contains(content, "gemini-3-flash") { + t.Fatal("expected missing default alias gemini-3-flash to be added") + } + if !strings.Contains(content, "claude-sonnet-4-5") { + t.Fatal("expected missing default alias claude-sonnet-4-5 to be added") + } + if !strings.Contains(content, "claude-sonnet-4-5-thinking") { + t.Fatal("expected missing default alias claude-sonnet-4-5-thinking to be added") + } + if !strings.Contains(content, "claude-opus-4-5-thinking") { + t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added") + } +} + +func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configFile := filepath.Join(dir, "config.yaml") + + content := `debug: true +port: 8080 +` + if err := os.WriteFile(configFile, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + migrated, err := MigrateOAuthModelAlias(configFile) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !migrated { + t.Fatal("expected migration to add default config") + } + + // Verify default antigravity config was added + data, _ := os.ReadFile(configFile) + content = string(data) + if !strings.Contains(content, "oauth-model-alias:") { + t.Fatal("expected oauth-model-alias to be added") + } + if !strings.Contains(content, "antigravity:") { + t.Fatal("expected antigravity channel to be added") + } + if !strings.Contains(content, "rev19-uic3-1p") { + t.Fatal("expected default antigravity aliases to include rev19-uic3-1p") + } +} + +func TestMigrateOAuthModelAlias_PreservesOtherConfig(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configFile := filepath.Join(dir, "config.yaml") + + content := `debug: true +port: 8080 +oauth-model-mappings: + gemini-cli: + - name: "test" + alias: "t" +api-keys: + - "key1" + - "key2" +` + if err := os.WriteFile(configFile, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + migrated, err := MigrateOAuthModelAlias(configFile) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !migrated { + t.Fatal("expected migration to occur") + } + + // Verify other config preserved + data, _ := os.ReadFile(configFile) + content = string(data) + if !strings.Contains(content, "debug: true") { + t.Fatal("expected debug field to be preserved") + } + if !strings.Contains(content, "port: 8080") { + t.Fatal("expected port field to be preserved") + } + if !strings.Contains(content, "api-keys:") { + t.Fatal("expected api-keys field to be preserved") + } +} + +func TestMigrateOAuthModelAlias_NonexistentFile(t *testing.T) { + t.Parallel() + + migrated, err := MigrateOAuthModelAlias("/nonexistent/path/config.yaml") + if err != nil { + t.Fatalf("unexpected error for nonexistent file: %v", err) + } + if migrated { + t.Fatal("expected no migration for nonexistent file") + } +} + +func TestMigrateOAuthModelAlias_EmptyFile(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configFile := filepath.Join(dir, "config.yaml") + + if err := os.WriteFile(configFile, []byte(""), 0644); err != nil { + t.Fatal(err) + } + + migrated, err := MigrateOAuthModelAlias(configFile) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if migrated { + t.Fatal("expected no migration for empty file") + } +} diff --git a/internal/config/oauth_model_alias_test.go b/internal/config/oauth_model_alias_test.go new file mode 100644 index 00000000..a5886474 --- /dev/null +++ b/internal/config/oauth_model_alias_test.go @@ -0,0 +1,56 @@ +package config + +import "testing" + +func TestSanitizeOAuthModelAlias_PreservesForkFlag(t *testing.T) { + cfg := &Config{ + OAuthModelAlias: map[string][]OAuthModelAlias{ + " CoDeX ": { + {Name: " gpt-5 ", Alias: " g5 ", Fork: true}, + {Name: "gpt-6", Alias: "g6"}, + }, + }, + } + + cfg.SanitizeOAuthModelAlias() + + aliases := cfg.OAuthModelAlias["codex"] + if len(aliases) != 2 { + t.Fatalf("expected 2 sanitized aliases, got %d", len(aliases)) + } + if aliases[0].Name != "gpt-5" || aliases[0].Alias != "g5" || !aliases[0].Fork { + t.Fatalf("expected first alias to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", aliases[0].Name, aliases[0].Alias, aliases[0].Fork) + } + if aliases[1].Name != "gpt-6" || aliases[1].Alias != "g6" || aliases[1].Fork { + t.Fatalf("expected second alias to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", aliases[1].Name, aliases[1].Alias, aliases[1].Fork) + } +} + +func TestSanitizeOAuthModelAlias_AllowsMultipleAliasesForSameName(t *testing.T) { + cfg := &Config{ + OAuthModelAlias: map[string][]OAuthModelAlias{ + "antigravity": { + {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true}, + {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true}, + {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true}, + }, + }, + } + + cfg.SanitizeOAuthModelAlias() + + aliases := cfg.OAuthModelAlias["antigravity"] + expected := []OAuthModelAlias{ + {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true}, + {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true}, + {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true}, + } + if len(aliases) != len(expected) { + t.Fatalf("expected %d sanitized aliases, got %d", len(expected), len(aliases)) + } + for i, exp := range expected { + if aliases[i].Name != exp.Name || aliases[i].Alias != exp.Alias || aliases[i].Fork != exp.Fork { + t.Fatalf("expected alias %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, aliases[i].Name, aliases[i].Alias, aliases[i].Fork) + } + } +} diff --git a/internal/config/oauth_model_mappings_test.go b/internal/config/oauth_model_mappings_test.go deleted file mode 100644 index 10bfe165..00000000 --- a/internal/config/oauth_model_mappings_test.go +++ /dev/null @@ -1,56 +0,0 @@ -package config - -import "testing" - -func TestSanitizeOAuthModelMappings_PreservesForkFlag(t *testing.T) { - cfg := &Config{ - OAuthModelMappings: map[string][]ModelNameMapping{ - " CoDeX ": { - {Name: " gpt-5 ", Alias: " g5 ", Fork: true}, - {Name: "gpt-6", Alias: "g6"}, - }, - }, - } - - cfg.SanitizeOAuthModelMappings() - - mappings := cfg.OAuthModelMappings["codex"] - if len(mappings) != 2 { - t.Fatalf("expected 2 sanitized mappings, got %d", len(mappings)) - } - if mappings[0].Name != "gpt-5" || mappings[0].Alias != "g5" || !mappings[0].Fork { - t.Fatalf("expected first mapping to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", mappings[0].Name, mappings[0].Alias, mappings[0].Fork) - } - if mappings[1].Name != "gpt-6" || mappings[1].Alias != "g6" || mappings[1].Fork { - t.Fatalf("expected second mapping to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", mappings[1].Name, mappings[1].Alias, mappings[1].Fork) - } -} - -func TestSanitizeOAuthModelMappings_AllowsMultipleAliasesForSameName(t *testing.T) { - cfg := &Config{ - OAuthModelMappings: map[string][]ModelNameMapping{ - "antigravity": { - {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true}, - {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true}, - {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true}, - }, - }, - } - - cfg.SanitizeOAuthModelMappings() - - mappings := cfg.OAuthModelMappings["antigravity"] - expected := []ModelNameMapping{ - {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true}, - {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true}, - {Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true}, - } - if len(mappings) != len(expected) { - t.Fatalf("expected %d sanitized mappings, got %d", len(expected), len(mappings)) - } - for i, exp := range expected { - if mappings[i].Name != exp.Name || mappings[i].Alias != exp.Alias || mappings[i].Fork != exp.Fork { - t.Fatalf("expected mapping %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, mappings[i].Name, mappings[i].Alias, mappings[i].Fork) - } - } -} diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go index 94e162b7..786c5318 100644 --- a/internal/config/vertex_compat.go +++ b/internal/config/vertex_compat.go @@ -13,6 +13,10 @@ type VertexCompatKey struct { // Maps to the x-goog-api-key header. APIKey string `yaml:"api-key" json:"api-key"` + // Priority controls selection preference when multiple credentials match. + // Higher values are preferred; defaults to 0. + Priority int `yaml:"priority,omitempty" json:"priority,omitempty"` + // Prefix optionally namespaces model aliases for this credential (e.g., "teamA/vertex-pro"). Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` @@ -32,6 +36,9 @@ type VertexCompatKey struct { Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"` } +func (k VertexCompatKey) GetAPIKey() string { return k.APIKey } +func (k VertexCompatKey) GetBaseURL() string { return k.BaseURL } + // VertexCompatModel represents a model configuration for Vertex compatibility, // including the actual model name and its alias for API routing. type VertexCompatModel struct { diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go index f87b10e4..28c9f3b9 100644 --- a/internal/logging/global_logger.go +++ b/internal/logging/global_logger.go @@ -29,6 +29,9 @@ var ( // Format: [2025-12-23 20:14:04] [debug] [manager.go:524] | a1b2c3d4 | Use API key sk-9...0RHO for model gpt-5.2 type LogFormatter struct{} +// logFieldOrder defines the display order for common log fields. +var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_mode", "original_value", "min", "max", "clamped_to", "error"} + // Format renders a single log entry with custom formatting. func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) { var buffer *bytes.Buffer @@ -52,11 +55,25 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) { } levelStr := fmt.Sprintf("%-5s", level) + // Build fields string (only print fields in logFieldOrder) + var fieldsStr string + if len(entry.Data) > 0 { + var fields []string + for _, k := range logFieldOrder { + if v, ok := entry.Data[k]; ok { + fields = append(fields, fmt.Sprintf("%s=%v", k, v)) + } + } + if len(fields) > 0 { + fieldsStr = " " + strings.Join(fields, " ") + } + } + var formatted string if entry.Caller != nil { - formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message) + formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s%s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message, fieldsStr) } else { - formatted = fmt.Sprintf("[%s] [%s] [%s] %s\n", timestamp, reqID, levelStr, message) + formatted = fmt.Sprintf("[%s] [%s] [%s] %s%s\n", timestamp, reqID, levelStr, message, fieldsStr) } buffer.WriteString(formatted) @@ -104,6 +121,24 @@ func isDirWritable(dir string) bool { return true } +// ResolveLogDirectory determines the directory used for application logs. +func ResolveLogDirectory(cfg *config.Config) string { + logDir := "logs" + if base := util.WritablePath(); base != "" { + return filepath.Join(base, "logs") + } + if cfg == nil { + return logDir + } + if !isDirWritable(logDir) { + authDir := strings.TrimSpace(cfg.AuthDir) + if authDir != "" { + logDir = filepath.Join(authDir, "logs") + } + } + return logDir +} + // ConfigureLogOutput switches the global log destination between rotating files and stdout. // When logsMaxTotalSizeMB > 0, a background cleaner removes the oldest log files in the logs directory // until the total size is within the limit. @@ -113,12 +148,7 @@ func ConfigureLogOutput(cfg *config.Config) error { writerMu.Lock() defer writerMu.Unlock() - logDir := "logs" - if base := util.WritablePath(); base != "" { - logDir = filepath.Join(base, "logs") - } else if !isDirWritable(logDir) { - logDir = filepath.Join(cfg.AuthDir, "logs") - } + logDir := ResolveLogDirectory(cfg) protectedPath := "" if cfg.LoggingToFile { diff --git a/internal/misc/codex_instructions.go b/internal/misc/codex_instructions.go index 9d0971c5..d50e8cef 100644 --- a/internal/misc/codex_instructions.go +++ b/internal/misc/codex_instructions.go @@ -7,11 +7,27 @@ import ( "embed" _ "embed" "strings" + "sync/atomic" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) +// codexInstructionsEnabled controls whether CodexInstructionsForModel returns official instructions. +// When false (default), CodexInstructionsForModel returns (true, "") immediately. +// Set via SetCodexInstructionsEnabled from config. +var codexInstructionsEnabled atomic.Bool + +// SetCodexInstructionsEnabled sets whether codex instructions processing is enabled. +func SetCodexInstructionsEnabled(enabled bool) { + codexInstructionsEnabled.Store(enabled) +} + +// GetCodexInstructionsEnabled returns whether codex instructions processing is enabled. +func GetCodexInstructionsEnabled() bool { + return codexInstructionsEnabled.Load() +} + //go:embed codex_instructions var codexInstructionsDir embed.FS @@ -124,6 +140,9 @@ func codexInstructionsForCodex(modelName, systemInstructions string) (bool, stri } func CodexInstructionsForModel(modelName, systemInstructions, userAgent string) (bool, string) { + if !GetCodexInstructionsEnabled() { + return true, "" + } if IsOpenCodeUserAgent(userAgent) { return codexInstructionsForOpenCode(systemInstructions) } diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index bea2ecc3..1d29bda2 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -27,7 +27,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.5 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, }, { ID: "claude-opus-4-5-20251101", @@ -39,7 +39,7 @@ func GetClaudeModels() []*ModelInfo { Description: "Premium model combining maximum intelligence with practical performance", ContextLength: 200000, MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, }, { ID: "claude-opus-4-1-20250805", @@ -50,7 +50,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.1 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-opus-4-20250514", @@ -61,7 +61,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-sonnet-4-20250514", @@ -72,7 +72,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-3-7-sonnet-20250219", @@ -83,7 +83,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 3.7 Sonnet", ContextLength: 128000, MaxCompletionTokens: 8192, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-3-5-haiku-20241022", @@ -287,6 +287,67 @@ func GetGeminiVertexModels() []*ModelInfo { SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, }, + // Imagen image generation models - use :predict action + { + ID: "imagen-4.0-generate-001", + Object: "model", + Created: 1750000000, + OwnedBy: "google", + Type: "gemini", + Name: "models/imagen-4.0-generate-001", + Version: "4.0", + DisplayName: "Imagen 4.0 Generate", + Description: "Imagen 4.0 image generation model", + SupportedGenerationMethods: []string{"predict"}, + }, + { + ID: "imagen-4.0-ultra-generate-001", + Object: "model", + Created: 1750000000, + OwnedBy: "google", + Type: "gemini", + Name: "models/imagen-4.0-ultra-generate-001", + Version: "4.0", + DisplayName: "Imagen 4.0 Ultra Generate", + Description: "Imagen 4.0 Ultra high-quality image generation model", + SupportedGenerationMethods: []string{"predict"}, + }, + { + ID: "imagen-3.0-generate-002", + Object: "model", + Created: 1740000000, + OwnedBy: "google", + Type: "gemini", + Name: "models/imagen-3.0-generate-002", + Version: "3.0", + DisplayName: "Imagen 3.0 Generate", + Description: "Imagen 3.0 image generation model", + SupportedGenerationMethods: []string{"predict"}, + }, + { + ID: "imagen-3.0-fast-generate-001", + Object: "model", + Created: 1740000000, + OwnedBy: "google", + Type: "gemini", + Name: "models/imagen-3.0-fast-generate-001", + Version: "3.0", + DisplayName: "Imagen 3.0 Fast Generate", + Description: "Imagen 3.0 fast image generation model", + SupportedGenerationMethods: []string{"predict"}, + }, + { + ID: "imagen-4.0-fast-generate-001", + Object: "model", + Created: 1750000000, + OwnedBy: "google", + Type: "gemini", + Name: "models/imagen-4.0-fast-generate-001", + Version: "4.0", + DisplayName: "Imagen 4.0 Fast Generate", + Description: "Imagen 4.0 fast image generation model", + SupportedGenerationMethods: []string{"predict"}, + }, } } @@ -432,7 +493,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "gemini-3-flash-preview", @@ -447,7 +508,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "gemini-pro-latest", @@ -742,6 +803,7 @@ func GetIFlowModels() []*ModelInfo { {ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600}, {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport}, {ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport}, + {ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200}, } models := make([]*ModelInfo, 0, len(entries)) for _, entry := range entries { @@ -764,21 +826,23 @@ func GetIFlowModels() []*ModelInfo { type AntigravityModelConfig struct { Thinking *ThinkingSupport MaxCompletionTokens int - Name string } // GetAntigravityModelConfig returns static configuration for antigravity models. -// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup. +// Keys use upstream model names returned by the Antigravity models endpoint. func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { return map[string]*AntigravityModelConfig{ - "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"}, - "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"}, - "gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"}, - "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"}, - "gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"}, - "gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"}, - "gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, - "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, + "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, + "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}}, + "gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, + "gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, + "gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}}, + "claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "claude-sonnet-4-5": {MaxCompletionTokens: 64000}, + "gpt-oss-120b-medium": {}, + "tab_flash_lite_preview": {}, } } @@ -788,6 +852,7 @@ func LookupStaticModelInfo(modelID string) *ModelInfo { if modelID == "" { return nil } + allModels := [][]*ModelInfo{ GetClaudeModels(), GetGeminiModels(), @@ -805,5 +870,15 @@ func LookupStaticModelInfo(modelID string) *ModelInfo { } } } + + // Check Antigravity static config + if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil { + return &ModelInfo{ + ID: modelID, + Thinking: cfg.Thinking, + MaxCompletionTokens: cfg.MaxCompletionTokens, + } + } + return nil } diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index a4e9acdf..970c2dc9 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -51,6 +51,11 @@ type ModelInfo struct { // Thinking holds provider-specific reasoning/thinking budget capabilities. // This is optional and currently used for Gemini thinking budget normalization. Thinking *ThinkingSupport `json:"thinking,omitempty"` + + // UserDefined indicates this model was defined through config file's models[] + // array (e.g., openai-compatibility.*.models[], *-api-key.models[]). + // UserDefined models have thinking configuration passed through without validation. + UserDefined bool `json:"-"` } // ThinkingSupport describes a model family's supported internal reasoning budget range. @@ -127,6 +132,21 @@ func GetGlobalRegistry() *ModelRegistry { return globalRegistry } +// LookupModelInfo searches the dynamic registry first, then falls back to static model definitions. +// +// This helper exists because some code paths only have a model ID and still need Thinking and +// max completion token metadata even when the dynamic registry hasn't been populated. +func LookupModelInfo(modelID string) *ModelInfo { + modelID = strings.TrimSpace(modelID) + if modelID == "" { + return nil + } + if info := GetGlobalRegistry().GetModelInfo(modelID); info != nil { + return info + } + return LookupStaticModelInfo(modelID) +} + // SetHook sets an optional hook for observing model registration changes. func (r *ModelRegistry) SetHook(hook ModelRegistryHook) { if r == nil { diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index c3e3edb0..a020c670 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -14,7 +14,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -111,7 +111,8 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A // Execute performs a non-streaming request to the AI Studio API. func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) translatedReq, body, err := e.translateRequest(req, opts, false) @@ -119,7 +120,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, return resp, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -166,7 +167,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, // ExecuteStream performs a streaming request to the AI Studio API. func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) translatedReq, body, err := e.translateRequest(req, opts, true) @@ -174,7 +176,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth return nil, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -315,6 +317,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth // CountTokens counts tokens for the given request using the AI Studio API. func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName _, body, err := e.translateRequest(req, opts, false) if err != nil { return cliproxyexecutor.Response{}, err @@ -324,7 +327,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A body.payload, _ = sjson.DeleteBytes(body.payload, "tools") body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings") - endpoint := e.buildEndpoint(req.Model, "countTokens", "") + endpoint := e.buildEndpoint(baseModel, "countTokens", "") wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -380,22 +383,22 @@ type translatedPayload struct { } func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("gemini") originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream) - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) - payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload) - payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) - payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true) - payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true) - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiImageAspectRatio(req.Model, payload) - payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream) + payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) + payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String()) + if err != nil { + return nil, translatedPayload{}, err + } + payload = fixGeminiImageAspectRatio(baseModel, payload) + payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema") diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 8d1ef23d..df26e376 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -24,7 +24,9 @@ import ( "github.com/google/uuid" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -107,8 +109,10 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut // Execute performs a non-streaming request to the Antigravity API. func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - isClaude := strings.Contains(strings.ToLower(req.Model), "claude") - if isClaude || strings.Contains(req.Model, "gemini-3-pro") { + baseModel := thinking.ParseSuffix(req.Model).ModelName + isClaude := strings.Contains(strings.ToLower(baseModel), "claude") + + if isClaude || strings.Contains(baseModel, "gemini-3-pro") { return e.executeClaudeNonStream(ctx, auth, req, opts) } @@ -120,23 +124,25 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) + translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String()) + if err != nil { + return resp, err + } + + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -146,7 +152,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -227,6 +233,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au // executeClaudeNonStream performs a claude non-streaming request to the Antigravity API. func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) if errToken != nil { return resp, errToken @@ -235,23 +243,25 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) - translated = normalizeAntigravityThinking(req.Model, translated, true) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) + translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String()) + if err != nil { + return resp, err + } + + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -261,7 +271,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -507,8 +517,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte { } if usageResult := responseNode.Get("usageMetadata"); usageResult.Exists() { usageRaw = usageResult.Raw - } else if usageResult := root.Get("usageMetadata"); usageResult.Exists() { - usageRaw = usageResult.Raw + } else if usageMetadataResult := root.Get("usageMetadata"); usageMetadataResult.Exists() { + usageRaw = usageMetadataResult.Raw } if partsResult := responseNode.Get("candidates.0.content.parts"); partsResult.IsArray() { @@ -587,6 +597,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte { // ExecuteStream performs a streaming request to the Antigravity API. func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + ctx = context.WithValue(ctx, "alt", "") token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) @@ -597,25 +609,25 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - isClaude := strings.Contains(strings.ToLower(req.Model), "claude") - from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) + translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String()) + if err != nil { + return nil, err + } + + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -625,12 +637,11 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL) if errReq != nil { err = errReq return nil, err } - httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) @@ -771,6 +782,8 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au // CountTokens counts tokens for the given request using the Antigravity API. func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) if errToken != nil { return cliproxyexecutor.Response{}, errToken @@ -786,7 +799,17 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("antigravity") respCtx := context.WithValue(ctx, "alt", opts.Alt) - isClaude := strings.Contains(strings.ToLower(req.Model), "claude") + // Prepare payload once (doesn't depend on baseURL) + payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String()) + if err != nil { + return cliproxyexecutor.Response{}, err + } + + payload = deleteJSONField(payload, "project") + payload = deleteJSONField(payload, "model") + payload = deleteJSONField(payload, "request.safetySettings") baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -803,14 +826,6 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut var lastErr error for idx, baseURL := range baseURLs { - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload) - payload = normalizeAntigravityThinking(req.Model, payload, isClaude) - payload = deleteJSONField(payload, "project") - payload = deleteJSONField(payload, "model") - payload = deleteJSONField(payload, "request.safetySettings") - base := strings.TrimSuffix(baseURL, "/") if base == "" { base = buildBaseURL(auth) @@ -980,35 +995,37 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c modelConfig := registry.GetAntigravityModelConfig() models := make([]*registry.ModelInfo, 0, len(result.Map())) for originalName := range result.Map() { - aliasName := modelName2Alias(originalName) - if aliasName != "" { - cfg := modelConfig[aliasName] - modelName := aliasName - if cfg != nil && cfg.Name != "" { - modelName = cfg.Name - } - modelInfo := ®istry.ModelInfo{ - ID: aliasName, - Name: modelName, - Description: aliasName, - DisplayName: aliasName, - Version: aliasName, - Object: "model", - Created: now, - OwnedBy: antigravityAuthType, - Type: antigravityAuthType, - } - // Look up Thinking support from static config using alias name - if cfg != nil { - if cfg.Thinking != nil { - modelInfo.Thinking = cfg.Thinking - } - if cfg.MaxCompletionTokens > 0 { - modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens - } - } - models = append(models, modelInfo) + modelID := strings.TrimSpace(originalName) + if modelID == "" { + continue } + switch modelID { + case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro": + continue + } + modelCfg := modelConfig[modelID] + modelName := modelID + modelInfo := ®istry.ModelInfo{ + ID: modelID, + Name: modelName, + Description: modelID, + DisplayName: modelID, + Version: modelID, + Object: "model", + Created: now, + OwnedBy: antigravityAuthType, + Type: antigravityAuthType, + } + // Look up Thinking support from static config using upstream model name. + if modelCfg != nil { + if modelCfg.Thinking != nil { + modelInfo.Thinking = modelCfg.Thinking + } + if modelCfg.MaxCompletionTokens > 0 { + modelInfo.MaxCompletionTokens = modelCfg.MaxCompletionTokens + } + } + models = append(models, modelInfo) } return models } @@ -1104,12 +1121,49 @@ func (e *AntigravityExecutor) refreshToken(ctx context.Context, auth *cliproxyau auth.Metadata["refresh_token"] = tokenResp.RefreshToken } auth.Metadata["expires_in"] = tokenResp.ExpiresIn - auth.Metadata["timestamp"] = time.Now().UnixMilli() - auth.Metadata["expired"] = time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339) + now := time.Now() + auth.Metadata["timestamp"] = now.UnixMilli() + auth.Metadata["expired"] = now.Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339) auth.Metadata["type"] = antigravityAuthType + if errProject := e.ensureAntigravityProjectID(ctx, auth, tokenResp.AccessToken); errProject != nil { + log.Warnf("antigravity executor: ensure project id failed: %v", errProject) + } return auth, nil } +func (e *AntigravityExecutor) ensureAntigravityProjectID(ctx context.Context, auth *cliproxyauth.Auth, accessToken string) error { + if auth == nil { + return nil + } + + if auth.Metadata["project_id"] != nil { + return nil + } + + token := strings.TrimSpace(accessToken) + if token == "" { + token = metaStringValue(auth.Metadata, "access_token") + } + if token == "" { + return nil + } + + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + projectID, errFetch := sdkAuth.FetchAntigravityProjectID(ctx, token, httpClient) + if errFetch != nil { + return errFetch + } + if strings.TrimSpace(projectID) == "" { + return nil + } + if auth.Metadata == nil { + auth.Metadata = make(map[string]any) + } + auth.Metadata["project_id"] = strings.TrimSpace(projectID) + + return nil +} + func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyauth.Auth, token, modelName string, payload []byte, stream bool, alt, baseURL string) (*http.Request, error) { if token == "" { return nil, statusErr{code: http.StatusUnauthorized, msg: "missing access token"} @@ -1146,7 +1200,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau } } payload = geminiToAntigravity(modelName, payload, projectID) - payload, _ = sjson.SetBytes(payload, "model", alias2ModelName(modelName)) + payload, _ = sjson.SetBytes(payload, "model", modelName) if strings.Contains(modelName, "claude") { strJSON := string(payload) @@ -1163,7 +1217,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau payload = []byte(strJSON) } - if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-preview") { + if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") { systemInstructionPartsResult := gjson.GetBytes(payload, "request.systemInstruction.parts") payload, _ = sjson.SetBytes(payload, "request.systemInstruction.role", "user") payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.0.text", systemInstruction) @@ -1353,13 +1407,6 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b template, _ = sjson.Delete(template, "request.safetySettings") template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED") - if !strings.HasPrefix(modelName, "gemini-3-") { - if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() { - template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel") - template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - } - } - if strings.Contains(modelName, "claude") { gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool { tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool { @@ -1417,108 +1464,3 @@ func generateProjectID() string { randomPart := strings.ToLower(uuid.NewString())[:5] return adj + "-" + noun + "-" + randomPart } - -func modelName2Alias(modelName string) string { - switch modelName { - case "rev19-uic3-1p": - return "gemini-2.5-computer-use-preview-10-2025" - case "gemini-3-pro-image": - return "gemini-3-pro-image-preview" - case "gemini-3-pro-high": - return "gemini-3-pro-preview" - case "gemini-3-flash": - return "gemini-3-flash-preview" - case "claude-sonnet-4-5": - return "gemini-claude-sonnet-4-5" - case "claude-sonnet-4-5-thinking": - return "gemini-claude-sonnet-4-5-thinking" - case "claude-opus-4-5-thinking": - return "gemini-claude-opus-4-5-thinking" - case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro": - return "" - default: - return modelName - } -} - -func alias2ModelName(modelName string) string { - switch modelName { - case "gemini-2.5-computer-use-preview-10-2025": - return "rev19-uic3-1p" - case "gemini-3-pro-image-preview": - return "gemini-3-pro-image" - case "gemini-3-pro-preview": - return "gemini-3-pro-high" - case "gemini-3-flash-preview": - return "gemini-3-flash" - case "gemini-claude-sonnet-4-5": - return "claude-sonnet-4-5" - case "gemini-claude-sonnet-4-5-thinking": - return "claude-sonnet-4-5-thinking" - case "gemini-claude-opus-4-5-thinking": - return "claude-opus-4-5-thinking" - default: - return modelName - } -} - -// normalizeAntigravityThinking clamps or removes thinking config based on model support. -// For Claude models, it additionally ensures thinking budget < max_tokens. -func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte { - payload = util.StripThinkingConfigIfUnsupported(model, payload) - if !util.ModelSupportsThinking(model) { - return payload - } - budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget") - if !budget.Exists() { - return payload - } - raw := int(budget.Int()) - normalized := util.NormalizeThinkingBudget(model, raw) - - if isClaude { - effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload) - if effectiveMax > 0 && normalized >= effectiveMax { - normalized = effectiveMax - 1 - } - minBudget := antigravityMinThinkingBudget(model) - if minBudget > 0 && normalized >= 0 && normalized < minBudget { - // Budget is below minimum, remove thinking config entirely - payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig") - return payload - } - if setDefaultMax { - if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil { - payload = res - } - } - } - - updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) - if err != nil { - return payload - } - return updated -} - -// antigravityEffectiveMaxTokens returns the max tokens to cap thinking: -// prefer request-provided maxOutputTokens; otherwise fall back to model default. -// The boolean indicates whether the value came from the model default (and thus should be written back). -func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) { - if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 { - return int(maxTok.Int()), false - } - if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 { - return modelInfo.MaxCompletionTokens, true - } - return 0, false -} - -// antigravityMinThinkingBudget returns the minimum thinking budget for a model. -// Falls back to -1 if no model info is found. -func antigravityMinThinkingBudget(model string) int { - if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil { - return modelInfo.Thinking.Min - } - return -1 -} diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 4242a244..b6d5418a 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -17,7 +17,7 @@ import ( claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" - "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -84,17 +84,15 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut } func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - apiKey, baseURL := claudeCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } from := opts.SourceFormat to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. @@ -103,23 +101,23 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) - body, _ = sjson.SetBytes(body, "model", model) - // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(model, req.Metadata, body) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) + body, _ = sjson.SetBytes(body, "model", baseModel) - if !strings.HasPrefix(model, "claude-3-5-haiku") { + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return resp, err + } + + if !strings.HasPrefix(baseModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) } - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) - // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(model, body) - // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -218,37 +216,36 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - apiKey, baseURL := claudeCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) - body, _ = sjson.SetBytes(body, "model", model) - // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(model, req.Metadata, body) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return nil, err + } + body = checkSystemInstructions(body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) - // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(model, body) - // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -381,8 +378,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - apiKey, baseURL := claudeCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } @@ -391,14 +389,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) - body, _ = sjson.SetBytes(body, "model", model) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) + body, _ = sjson.SetBytes(body, "model", baseModel) - if !strings.HasPrefix(model, "claude-3-5-haiku") { + if !strings.HasPrefix(baseModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) } @@ -527,17 +521,6 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) { return betas, body } -// injectThinkingConfig adds thinking configuration based on metadata using the unified flow. -// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata -// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini. -func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte { - budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata) - if !ok { - return body - } - return util.ApplyClaudeThinkingConfig(body, budget) -} - // disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking. // Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool. // See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations @@ -551,126 +534,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte { return body } -// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled. -// Anthropic API requires this constraint; violating it returns a 400 error. -// This function should be called after all thinking configuration is finalized. -// It looks up the model's MaxCompletionTokens from the registry to use as the cap. -func ensureMaxTokensForThinking(modelName string, body []byte) []byte { - thinkingType := gjson.GetBytes(body, "thinking.type").String() - if thinkingType != "enabled" { - return body - } - - budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int() - if budgetTokens <= 0 { - return body - } - - maxTokens := gjson.GetBytes(body, "max_tokens").Int() - - // Look up the model's max completion tokens from the registry - maxCompletionTokens := 0 - if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil { - maxCompletionTokens = modelInfo.MaxCompletionTokens - } - - // Fall back to budget + buffer if registry lookup fails or returns 0 - const fallbackBuffer = 4000 - requiredMaxTokens := budgetTokens + fallbackBuffer - if maxCompletionTokens > 0 { - requiredMaxTokens = int64(maxCompletionTokens) - } - - if maxTokens < requiredMaxTokens { - body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) - } - return body -} - -func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveClaudeConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - -func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey { - if auth == nil || e.cfg == nil { - return nil - } - var attrKey, attrBase string - if auth.Attributes != nil { - attrKey = strings.TrimSpace(auth.Attributes["api_key"]) - attrBase = strings.TrimSpace(auth.Attributes["base_url"]) - } - for i := range e.cfg.ClaudeKey { - entry := &e.cfg.ClaudeKey[i] - cfgKey := strings.TrimSpace(entry.APIKey) - cfgBase := strings.TrimSpace(entry.BaseURL) - if attrKey != "" && attrBase != "" { - if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) { - return entry - } - continue - } - if attrKey != "" && strings.EqualFold(cfgKey, attrKey) { - if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) { - return entry - } - } - if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) { - return entry - } - } - if attrKey != "" { - for i := range e.cfg.ClaudeKey { - entry := &e.cfg.ClaudeKey[i] - if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) { - return entry - } - } - } - return nil -} - type compositeReadCloser struct { io.Reader closers []func() error diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 2f4c6295..cc0e32a1 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -13,6 +13,7 @@ import ( codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -72,18 +73,15 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth } func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - apiKey, baseURL := codexCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := codexCreds(auth) if baseURL == "" { baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -93,20 +91,25 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re originalPayload = bytes.Clone(opts.OriginalRequest) } originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent) - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent) - body = sdktranslator.TranslateRequest(from, to, model, body, false) + body = sdktranslator.TranslateRequest(from, to, baseModel, body, false) body = misc.StripCodexUserAgent(body) - body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) - body = NormalizeThinkingConfig(body, model, false) - if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { - return resp, errValidate + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return resp, err } - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") + body, _ = sjson.DeleteBytes(body, "safety_identifier") + if !gjson.GetBytes(body, "instructions").Exists() { + body, _ = sjson.SetBytes(body, "instructions", "") + } url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -182,18 +185,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re } func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - apiKey, baseURL := codexCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := codexCreds(auth) if baseURL == "" { baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -203,20 +203,24 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au originalPayload = bytes.Clone(opts.OriginalRequest) } originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent) - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent) - body = sdktranslator.TranslateRequest(from, to, model, body, true) + body = sdktranslator.TranslateRequest(from, to, baseModel, body, true) body = misc.StripCodexUserAgent(body) - body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) - body = NormalizeThinkingConfig(body, model, false) - if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { - return nil, errValidate + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return nil, err } - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) + + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") - body, _ = sjson.SetBytes(body, "model", model) + body, _ = sjson.DeleteBytes(body, "safety_identifier") + body, _ = sjson.SetBytes(body, "model", baseModel) + if !gjson.GetBytes(body, "instructions").Exists() { + body, _ = sjson.SetBytes(body, "instructions", "") + } url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -303,25 +307,30 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + baseModel := thinking.ParseSuffix(req.Model).ModelName from := opts.SourceFormat to := sdktranslator.FromString("codex") userAgent := codexUserAgent(ctx) body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent) - body = sdktranslator.TranslateRequest(from, to, model, body, false) + body = sdktranslator.TranslateRequest(from, to, baseModel, body, false) body = misc.StripCodexUserAgent(body) - body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) - body, _ = sjson.SetBytes(body, "model", model) + body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return cliproxyexecutor.Response{}, err + } + + body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") + body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.SetBytes(body, "stream", false) + if !gjson.GetBytes(body, "instructions").Exists() { + body, _ = sjson.SetBytes(body, "instructions", "") + } - enc, err := tokenizerForCodexModel(model) + enc, err := tokenizerForCodexModel(baseModel) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err) } @@ -593,51 +602,6 @@ func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) { return } -func (e *CodexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveCodexConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - func (e *CodexExecutor) resolveCodexConfig(auth *cliproxyauth.Auth) *config.CodexKey { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 20b93a92..b23406af 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -20,6 +20,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -102,28 +103,33 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth. // Execute performs a non-streaming request to the Gemini CLI API. func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) if err != nil { return resp, err } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String()) + if err != nil { + return resp, err + } + + basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated) action := "generateContent" if req.Metadata != nil { @@ -133,9 +139,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(baseModel) + if len(models) == 0 || models[0] != baseModel { + models = append([]string{baseModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -246,34 +252,39 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth // ExecuteStream performs a streaming request to the Gemini CLI API. func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) if err != nil { return nil, err } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + + basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String()) + if err != nil { + return nil, err + } + + basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated) projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(baseModel) + if len(models) == 0 || models[0] != baseModel { + models = append([]string{baseModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -435,6 +446,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut // CountTokens counts tokens for the given request using the Gemini CLI API. func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) if err != nil { return cliproxyexecutor.Response{}, err @@ -443,9 +456,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(baseModel) + if len(models) == 0 || models[0] != baseModel { + models = append([]string{baseModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -463,15 +476,18 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. // The loop variable attemptModel is only used as the concrete model id sent to the upstream // Gemini CLI endpoint when iterating fallback variants. - for _, attemptModel := range models { - payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload) + for range models { + payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String()) + if err != nil { + return cliproxyexecutor.Response{}, err + } + payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiCLIImageAspectRatio(req.Model, payload) + payload = fixGeminiCLIImageAspectRatio(baseModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index a913a5c0..e9f9dbca 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -102,16 +103,13 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut // - cliproxyexecutor.Response: The response from the API // - error: An error if the request fails func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, bearer := geminiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(model, auth); override != "" { - model = override - } - // Official Gemini API via API key or OAuth bearer from := opts.SourceFormat to := sdktranslator.FromString("gemini") @@ -119,15 +117,17 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - body = ApplyThinkingMetadata(body, req.Metadata, model) - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return resp, err + } + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) action := "generateContent" if req.Metadata != nil { @@ -136,7 +136,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } } baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action) + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -206,34 +206,33 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // ExecuteStream performs a streaming request to the Gemini API. func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, bearer := geminiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(model, auth); override != "" { - model = override - } - from := opts.SourceFormat to := sdktranslator.FromString("gemini") originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) - body = ApplyThinkingMetadata(body, req.Metadata, model) - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return nil, err + } + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent") + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "streamGenerateContent") if opts.Alt == "" { url = url + "?alt=sse" } else { @@ -331,27 +330,28 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A // CountTokens counts tokens for the given request using the Gemini API. func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - apiKey, bearer := geminiCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName - model := req.Model - if override := e.resolveUpstreamModel(model, auth); override != "" { - model = override - } + apiKey, bearer := geminiCreds(auth) from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model) - translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(model, translatedReq) + translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String()) + if err != nil { + return cliproxyexecutor.Response{}, err + } + + translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") - translatedReq, _ = sjson.SetBytes(translatedReq, "model", model) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens") + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "countTokens") requestBody := bytes.NewReader(translatedReq) @@ -450,51 +450,6 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string { return base } -func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveGeminiConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index eebf6b1b..1184c07e 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -12,10 +12,11 @@ import ( "io" "net/http" "strings" + "time" vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -31,6 +32,143 @@ const ( vertexAPIVersion = "v1" ) +// isImagenModel checks if the model name is an Imagen image generation model. +// Imagen models use the :predict action instead of :generateContent. +func isImagenModel(model string) bool { + lowerModel := strings.ToLower(model) + return strings.Contains(lowerModel, "imagen") +} + +// getVertexAction returns the appropriate action for the given model. +// Imagen models use "predict", while Gemini models use "generateContent". +func getVertexAction(model string, isStream bool) string { + if isImagenModel(model) { + return "predict" + } + if isStream { + return "streamGenerateContent" + } + return "generateContent" +} + +// convertImagenToGeminiResponse converts Imagen API response to Gemini format +// so it can be processed by the standard translation pipeline. +// This ensures Imagen models return responses in the same format as gemini-3-pro-image-preview. +func convertImagenToGeminiResponse(data []byte, model string) []byte { + predictions := gjson.GetBytes(data, "predictions") + if !predictions.Exists() || !predictions.IsArray() { + return data + } + + // Build Gemini-compatible response with inlineData + parts := make([]map[string]any, 0) + for _, pred := range predictions.Array() { + imageData := pred.Get("bytesBase64Encoded").String() + mimeType := pred.Get("mimeType").String() + if mimeType == "" { + mimeType = "image/png" + } + if imageData != "" { + parts = append(parts, map[string]any{ + "inlineData": map[string]any{ + "mimeType": mimeType, + "data": imageData, + }, + }) + } + } + + // Generate unique response ID using timestamp + responseId := fmt.Sprintf("imagen-%d", time.Now().UnixNano()) + + response := map[string]any{ + "candidates": []map[string]any{{ + "content": map[string]any{ + "parts": parts, + "role": "model", + }, + "finishReason": "STOP", + }}, + "responseId": responseId, + "modelVersion": model, + // Imagen API doesn't return token counts, set to 0 for tracking purposes + "usageMetadata": map[string]any{ + "promptTokenCount": 0, + "candidatesTokenCount": 0, + "totalTokenCount": 0, + }, + } + + result, err := json.Marshal(response) + if err != nil { + return data + } + return result +} + +// convertToImagenRequest converts a Gemini-style request to Imagen API format. +// Imagen API uses a different structure: instances[].prompt instead of contents[]. +func convertToImagenRequest(payload []byte) ([]byte, error) { + // Extract prompt from Gemini-style contents + prompt := "" + + // Try to get prompt from contents[0].parts[0].text + contentsText := gjson.GetBytes(payload, "contents.0.parts.0.text") + if contentsText.Exists() { + prompt = contentsText.String() + } + + // If no contents, try messages format (OpenAI-compatible) + if prompt == "" { + messagesText := gjson.GetBytes(payload, "messages.#.content") + if messagesText.Exists() && messagesText.IsArray() { + for _, msg := range messagesText.Array() { + if msg.String() != "" { + prompt = msg.String() + break + } + } + } + } + + // If still no prompt, try direct prompt field + if prompt == "" { + directPrompt := gjson.GetBytes(payload, "prompt") + if directPrompt.Exists() { + prompt = directPrompt.String() + } + } + + if prompt == "" { + return nil, fmt.Errorf("imagen: no prompt found in request") + } + + // Build Imagen API request + imagenReq := map[string]any{ + "instances": []map[string]any{ + { + "prompt": prompt, + }, + }, + "parameters": map[string]any{ + "sampleCount": 1, + }, + } + + // Extract optional parameters + if aspectRatio := gjson.GetBytes(payload, "aspectRatio"); aspectRatio.Exists() { + imagenReq["parameters"].(map[string]any)["aspectRatio"] = aspectRatio.String() + } + if sampleCount := gjson.GetBytes(payload, "sampleCount"); sampleCount.Exists() { + imagenReq["parameters"].(map[string]any)["sampleCount"] = int(sampleCount.Int()) + } + if negativePrompt := gjson.GetBytes(payload, "negativePrompt"); negativePrompt.Exists() { + imagenReq["instances"].([]map[string]any)[0]["negativePrompt"] = negativePrompt.String() + } + + return json.Marshal(imagenReq) +} + // GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials. type GeminiVertexExecutor struct { cfg *config.Config @@ -155,39 +293,50 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut // executeWithServiceAccount handles authentication using service account credentials. // This method contains the original service account authentication logic. func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - from := opts.SourceFormat - to := sdktranslator.FromString("gemini") - originalPayload := bytes.Clone(req.Payload) - if len(opts.OriginalRequest) > 0 { - originalPayload = bytes.Clone(opts.OriginalRequest) - } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", req.Model) + var body []byte - action := "generateContent" + // Handle Imagen models with special request format + if isImagenModel(baseModel) { + imagenBody, errImagen := convertToImagenRequest(req.Payload) + if errImagen != nil { + return resp, errImagen + } + body = imagenBody + } else { + // Standard Gemini translation flow + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") + + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return resp, err + } + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) + } + + action := getVertexAction(baseModel, false) if req.Metadata != nil { if a, _ := req.Metadata["action"].(string); a == "countTokens" { action = "countTokens" } } baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action) + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -250,6 +399,16 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } appendAPIResponseChunk(ctx, e.cfg, data) reporter.publish(ctx, parseGeminiUsage(data)) + + // For Imagen models, convert response to Gemini format before translation + // This ensures Imagen responses use the same format as gemini-3-pro-image-preview + if isImagenModel(baseModel) { + data = convertImagenToGeminiResponse(data, baseModel) + } + + // Standard Gemini translation (works for both Gemini and converted Imagen responses) + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") var param any out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) resp = cliproxyexecutor.Response{Payload: []byte(out)} @@ -258,37 +417,31 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au // executeWithAPIKey handles authentication using API key credentials. func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) + baseModel := thinking.ParseSuffix(req.Model).ModelName - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - action := "generateContent" + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return resp, err + } + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) + + action := getVertexAction(baseModel, false) if req.Metadata != nil { if a, _ := req.Metadata["action"].(string); a == "countTokens" { action = "countTokens" @@ -299,7 +452,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action) + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -367,37 +520,40 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip // executeStreamWithServiceAccount handles streaming authentication using service account credentials. func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", req.Model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return nil, err + } + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) + + action := getVertexAction(baseModel, true) baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent") - if opts.Alt == "" { - url = url + "?alt=sse" - } else { - url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action) + // Imagen models don't support streaming, skip SSE params + if !isImagenModel(baseModel) { + if opts.Alt == "" { + url = url + "?alt=sse" + } else { + url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + } } body, _ = sjson.DeleteBytes(body, "session_id") @@ -487,45 +643,43 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte // executeStreamWithAPIKey handles streaming authentication using API key credentials. func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) + baseModel := thinking.ParseSuffix(req.Model).ModelName - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return nil, err + } + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) + + action := getVertexAction(baseModel, true) // For API key auth, use simpler URL format without project/location if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent") - if opts.Alt == "" { - url = url + "?alt=sse" - } else { - url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action) + // Imagen models don't support streaming, skip SSE params + if !isImagenModel(baseModel) { + if opts.Alt == "" { + url = url + "?alt=sse" + } else { + url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + } } body, _ = sjson.DeleteBytes(body, "session_id") @@ -612,26 +766,27 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth // countTokensWithServiceAccount counts tokens using service account credentials. func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) + + translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String()) + if err != nil { + return cliproxyexecutor.Response{}, err } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) - translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model) + + translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens") + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { @@ -688,10 +843,6 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context return cliproxyexecutor.Response{}, errRead } appendAPIResponseChunk(ctx, e.cfg, data) - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) - return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)} - } count := gjson.GetBytes(data, "totalTokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: []byte(out)}, nil @@ -699,24 +850,20 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context // countTokensWithAPIKey handles token counting using API key credentials. func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) { - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + baseModel := thinking.ParseSuffix(req.Model).ModelName from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) - budgetOverride = &norm - } - translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) + + translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String()) + if err != nil { + return cliproxyexecutor.Response{}, err } - translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(model, translatedReq) - translatedReq, _ = sjson.SetBytes(translatedReq, "model", model) + + translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") @@ -726,7 +873,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens") + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { @@ -780,10 +927,6 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * return cliproxyexecutor.Response{}, errRead } appendAPIResponseChunk(ctx, e.cfg, data) - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) - return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)} - } count := gjson.GetBytes(data, "totalTokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: []byte(out)}, nil @@ -870,53 +1013,6 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau return tok.AccessToken, nil } -// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration. -// It matches the requested model alias against configured models and returns the actual upstream name. -func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveVertexConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - // resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth. func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey { if auth == nil || e.cfg == nil { diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index c8b7706c..3e6ca4e5 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -12,6 +12,7 @@ import ( iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -67,6 +68,8 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth // Execute performs a non-streaming chat completion request. func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := iflowCreds(auth) if strings.TrimSpace(apiKey) == "" { err = fmt.Errorf("iflow executor: missing api key") @@ -76,7 +79,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re baseURL = iflowauth.DefaultAPIBaseURL } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -85,17 +88,17 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return resp, errValidate + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow") + if err != nil { + return resp, err } - body = applyIFlowThinkingConfig(body) + body = preserveReasoningContentInMessages(body) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -154,6 +157,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re reporter.ensurePublished(ctx) var param any + // Note: TranslateNonStream uses req.Model (original with suffix) to preserve + // the original model name in the response for client compatibility. out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) resp = cliproxyexecutor.Response{Payload: []byte(out)} return resp, nil @@ -161,6 +166,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re // ExecuteStream performs a streaming chat completion request. func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := iflowCreds(auth) if strings.TrimSpace(apiKey) == "" { err = fmt.Errorf("iflow executor: missing api key") @@ -170,7 +177,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au baseURL = iflowauth.DefaultAPIBaseURL } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -179,23 +186,22 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, _ = sjson.SetBytes(body, "model", baseModel) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return nil, errValidate + body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow") + if err != nil { + return nil, err } - body = applyIFlowThinkingConfig(body) + body = preserveReasoningContentInMessages(body) // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. toolsResult := gjson.GetBytes(body, "tools") if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { body = ensureToolsArray(body) } - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -278,11 +284,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - enc, err := tokenizerForModel(req.Model) + enc, err := tokenizerForModel(baseModel) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err) } @@ -520,41 +528,3 @@ func preserveReasoningContentInMessages(body []byte) []byte { return body } - -// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations. -// This should be called after NormalizeThinkingConfig has processed the payload. -// -// Model-specific handling: -// - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false -// - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation -func applyIFlowThinkingConfig(body []byte) []byte { - effort := gjson.GetBytes(body, "reasoning_effort") - if !effort.Exists() { - return body - } - - model := strings.ToLower(gjson.GetBytes(body, "model").String()) - val := strings.ToLower(strings.TrimSpace(effort.String())) - enableThinking := val != "none" && val != "" - - // Remove reasoning_effort as we'll convert to model-specific format - body, _ = sjson.DeleteBytes(body, "reasoning_effort") - body, _ = sjson.DeleteBytes(body, "thinking") - - // GLM-4.6/4.7: Use chat_template_kwargs - if strings.HasPrefix(model, "glm-4") { - body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking) - if enableThinking { - body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false) - } - return body - } - - // MiniMax M2/M2.1: Use reasoning_split - if strings.HasPrefix(model, "minimax-m2") { - body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking) - return body - } - - return body -} diff --git a/internal/runtime/executor/iflow_executor_test.go b/internal/runtime/executor/iflow_executor_test.go new file mode 100644 index 00000000..e588548b --- /dev/null +++ b/internal/runtime/executor/iflow_executor_test.go @@ -0,0 +1,67 @@ +package executor + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" +) + +func TestIFlowExecutorParseSuffix(t *testing.T) { + tests := []struct { + name string + model string + wantBase string + wantLevel string + }{ + {"no suffix", "glm-4", "glm-4", ""}, + {"glm with suffix", "glm-4.1-flash(high)", "glm-4.1-flash", "high"}, + {"minimax no suffix", "minimax-m2", "minimax-m2", ""}, + {"minimax with suffix", "minimax-m2.1(medium)", "minimax-m2.1", "medium"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := thinking.ParseSuffix(tt.model) + if result.ModelName != tt.wantBase { + t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase) + } + }) + } +} + +func TestPreserveReasoningContentInMessages(t *testing.T) { + tests := []struct { + name string + input []byte + want []byte // nil means output should equal input + }{ + { + "non-glm model passthrough", + []byte(`{"model":"gpt-4","messages":[]}`), + nil, + }, + { + "glm model with empty messages", + []byte(`{"model":"glm-4","messages":[]}`), + nil, + }, + { + "glm model preserves existing reasoning_content", + []byte(`{"model":"glm-4","messages":[{"role":"assistant","content":"hi","reasoning_content":"thinking..."}]}`), + nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := preserveReasoningContentInMessages(tt.input) + want := tt.want + if want == nil { + want = tt.input + } + if string(got) != string(want) { + t.Errorf("preserveReasoningContentInMessages() = %s, want %s", got, want) + } + }) + } +} diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 04dbf23f..a2bef724 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -11,6 +11,7 @@ import ( "time" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -69,7 +70,9 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau } func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) baseURL, apiKey := e.resolveCredentials(auth) @@ -85,18 +88,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream) - modelOverride := e.resolveUpstreamModel(req.Model, auth) - if modelOverride != "" { - translated = e.overrideModel(translated, modelOverride) - } - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated) - allowCompat := e.allowCompatReasoningEffort(req.Model, auth) - translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) - translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) - if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil { - return resp, errValidate + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream) + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated) + + translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String()) + if err != nil { + return resp, err } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" @@ -168,7 +166,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A } func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) baseURL, apiKey := e.resolveCredentials(auth) @@ -176,24 +176,20 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy err = statusErr{code: http.StatusUnauthorized, msg: "missing provider baseURL"} return nil, err } + from := opts.SourceFormat to := sdktranslator.FromString("openai") originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - modelOverride := e.resolveUpstreamModel(req.Model, auth) - if modelOverride != "" { - translated = e.overrideModel(translated, modelOverride) - } - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated) - allowCompat := e.allowCompatReasoningEffort(req.Model, auth) - translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) - translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) - if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil { - return nil, errValidate + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated) + + translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String()) + if err != nil { + return nil, err } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" @@ -293,14 +289,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy } func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("openai") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - modelForCounting := req.Model - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - translated = e.overrideModel(translated, modelOverride) - modelForCounting = modelOverride + modelForCounting := baseModel + + translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String()) + if err != nil { + return cliproxyexecutor.Response{}, err } enc, err := tokenizerForModel(modelForCounting) @@ -336,53 +335,6 @@ func (e *OpenAICompatExecutor) resolveCredentials(auth *cliproxyauth.Auth) (base return } -func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - if alias == "" || auth == nil || e.cfg == nil { - return "" - } - compat := e.resolveCompatConfig(auth) - if compat == nil { - return "" - } - for i := range compat.Models { - model := compat.Models[i] - if model.Alias != "" { - if strings.EqualFold(model.Alias, alias) { - if model.Name != "" { - return model.Name - } - return alias - } - continue - } - if strings.EqualFold(model.Name, alias) { - return model.Name - } - } - return "" -} - -func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool { - trimmed := strings.TrimSpace(model) - if trimmed == "" || e == nil || e.cfg == nil { - return false - } - compat := e.resolveCompatConfig(auth) - if compat == nil || len(compat.Models) == 0 { - return false - } - for i := range compat.Models { - entry := compat.Models[i] - if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) { - return true - } - if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) { - return true - } - } - return false -} - func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index e3cfc5d4..364e2ee9 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -1,109 +1,14 @@ package executor import ( - "fmt" - "net/http" + "encoding/json" "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) -// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192)) -// for standard Gemini format payloads. It normalizes the budget when the model supports thinking. -func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { - // Use the alias from metadata if available, as it's registered in the global registry - // with thinking metadata; the upstream model name may not be registered. - lookupModel := util.ResolveOriginalModel(model, metadata) - - // Determine which model to use for thinking support check. - // If the alias (lookupModel) is not in the registry, fall back to the upstream model. - thinkingModel := lookupModel - if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) { - thinkingModel = model - } - - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata) - if !ok || (budgetOverride == nil && includeOverride == nil) { - return payload - } - if !util.ModelSupportsThinking(thinkingModel) { - return payload - } - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride) - budgetOverride = &norm - } - return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) -} - -// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192)) -// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. -func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { - // Use the alias from metadata if available, as it's registered in the global registry - // with thinking metadata; the upstream model name may not be registered. - lookupModel := util.ResolveOriginalModel(model, metadata) - - // Determine which model to use for thinking support check. - // If the alias (lookupModel) is not in the registry, fall back to the upstream model. - thinkingModel := lookupModel - if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) { - thinkingModel = model - } - - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata) - if !ok || (budgetOverride == nil && includeOverride == nil) { - return payload - } - if !util.ModelSupportsThinking(thinkingModel) { - return payload - } - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride) - budgetOverride = &norm - } - return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) -} - -// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. -// Metadata values take precedence over any existing field when the model supports thinking, intentionally -// overwriting caller-provided values to honor suffix/default metadata priority. -func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte { - if len(metadata) == 0 { - return payload - } - if field == "" { - return payload - } - baseModel := util.ResolveOriginalModel(model, metadata) - if baseModel == "" { - baseModel = model - } - if !util.ModelSupportsThinking(baseModel) && !allowCompat { - return payload - } - if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if util.ModelUsesThinkingLevels(baseModel) || allowCompat { - if updated, err := sjson.SetBytes(payload, field, effort); err == nil { - return updated - } - } - } - // Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models. - if util.ModelUsesThinkingLevels(baseModel) || allowCompat { - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, field, effort); err == nil { - return updated - } - } - } - } - return payload -} - // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter // paths as relative to the provided root path (for example, "request" for Gemini CLI) // and restricts matches to the given protocol when supplied. Defaults are checked @@ -113,13 +18,14 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string return payload } rules := cfg.Payload - if len(rules.Default) == 0 && len(rules.Override) == 0 { + if len(rules.Default) == 0 && len(rules.DefaultRaw) == 0 && len(rules.Override) == 0 && len(rules.OverrideRaw) == 0 { return payload } model = strings.TrimSpace(model) if model == "" { return payload } + candidates := payloadModelCandidates(cfg, model, protocol) out := payload source := original if len(source) == 0 { @@ -129,7 +35,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string // Apply default rules: first write wins per field across all matching rules. for i := range rules.Default { rule := &rules.Default[i] - if !payloadRuleMatchesModel(rule, model, protocol) { + if !payloadRuleMatchesModels(rule, protocol, candidates) { continue } for path, value := range rule.Params { @@ -151,10 +57,39 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string appliedDefaults[fullPath] = struct{}{} } } + // Apply default raw rules: first write wins per field across all matching rules. + for i := range rules.DefaultRaw { + rule := &rules.DefaultRaw[i] + if !payloadRuleMatchesModels(rule, protocol, candidates) { + continue + } + for path, value := range rule.Params { + fullPath := buildPayloadPath(root, path) + if fullPath == "" { + continue + } + if gjson.GetBytes(source, fullPath).Exists() { + continue + } + if _, ok := appliedDefaults[fullPath]; ok { + continue + } + rawValue, ok := payloadRawValue(value) + if !ok { + continue + } + updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue) + if errSet != nil { + continue + } + out = updated + appliedDefaults[fullPath] = struct{}{} + } + } // Apply override rules: last write wins per field across all matching rules. for i := range rules.Override { rule := &rules.Override[i] - if !payloadRuleMatchesModel(rule, model, protocol) { + if !payloadRuleMatchesModels(rule, protocol, candidates) { continue } for path, value := range rule.Params { @@ -169,9 +104,43 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string out = updated } } + // Apply override raw rules: last write wins per field across all matching rules. + for i := range rules.OverrideRaw { + rule := &rules.OverrideRaw[i] + if !payloadRuleMatchesModels(rule, protocol, candidates) { + continue + } + for path, value := range rule.Params { + fullPath := buildPayloadPath(root, path) + if fullPath == "" { + continue + } + rawValue, ok := payloadRawValue(value) + if !ok { + continue + } + updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue) + if errSet != nil { + continue + } + out = updated + } + } return out } +func payloadRuleMatchesModels(rule *config.PayloadRule, protocol string, models []string) bool { + if rule == nil || len(models) == 0 { + return false + } + for _, model := range models { + if payloadRuleMatchesModel(rule, model, protocol) { + return true + } + } + return false +} + func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) bool { if rule == nil { return false @@ -194,6 +163,65 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b return false } +func payloadModelCandidates(cfg *config.Config, model, protocol string) []string { + model = strings.TrimSpace(model) + if model == "" { + return nil + } + candidates := []string{model} + if cfg == nil { + return candidates + } + aliases := payloadModelAliases(cfg, model, protocol) + if len(aliases) == 0 { + return candidates + } + seen := map[string]struct{}{strings.ToLower(model): struct{}{}} + for _, alias := range aliases { + alias = strings.TrimSpace(alias) + if alias == "" { + continue + } + key := strings.ToLower(alias) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + candidates = append(candidates, alias) + } + return candidates +} + +func payloadModelAliases(cfg *config.Config, model, protocol string) []string { + if cfg == nil { + return nil + } + model = strings.TrimSpace(model) + if model == "" { + return nil + } + channel := strings.ToLower(strings.TrimSpace(protocol)) + if channel == "" { + return nil + } + entries := cfg.OAuthModelAlias[channel] + if len(entries) == 0 { + return nil + } + aliases := make([]string, 0, 2) + for _, entry := range entries { + if !strings.EqualFold(strings.TrimSpace(entry.Name), model) { + continue + } + alias := strings.TrimSpace(entry.Alias) + if alias == "" { + continue + } + aliases = append(aliases, alias) + } + return aliases +} + // buildPayloadPath combines an optional root path with a relative parameter path. // When root is empty, the parameter path is used as-is. When root is non-empty, // the parameter path is treated as relative to root. @@ -212,6 +240,24 @@ func buildPayloadPath(root, path string) string { return r + "." + p } +func payloadRawValue(value any) ([]byte, bool) { + if value == nil { + return nil, false + } + switch typed := value.(type) { + case string: + return []byte(typed), true + case []byte: + return typed, true + default: + raw, errMarshal := json.Marshal(typed) + if errMarshal != nil { + return nil, false + } + return raw, true + } +} + // matchModelPattern performs simple wildcard matching where '*' matches zero or more characters. // Examples: // @@ -256,102 +302,3 @@ func matchModelPattern(pattern, model string) bool { } return pi == len(pattern) } - -// NormalizeThinkingConfig normalizes thinking-related fields in the payload -// based on model capabilities. For models without thinking support, it strips -// reasoning fields. For models with level-based thinking, it validates and -// normalizes the reasoning effort level. For models with numeric budget thinking, -// it strips the effort string fields. -func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte { - if len(payload) == 0 || model == "" { - return payload - } - - if !util.ModelSupportsThinking(model) { - if allowCompat { - return payload - } - return StripThinkingFields(payload, false) - } - - if util.ModelUsesThinkingLevels(model) { - return NormalizeReasoningEffortLevel(payload, model) - } - - // Model supports thinking but uses numeric budgets, not levels. - // Strip effort string fields since they are not applicable. - return StripThinkingFields(payload, true) -} - -// StripThinkingFields removes thinking-related fields from the payload for -// models that do not support thinking. If effortOnly is true, only removes -// effort string fields (for models using numeric budgets). -func StripThinkingFields(payload []byte, effortOnly bool) []byte { - fieldsToRemove := []string{ - "reasoning_effort", - "reasoning.effort", - } - if !effortOnly { - fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...) - } - out := payload - for _, field := range fieldsToRemove { - if gjson.GetBytes(out, field).Exists() { - out, _ = sjson.DeleteBytes(out, field) - } - } - return out -} - -// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort -// or reasoning.effort field for level-based thinking models. -func NormalizeReasoningEffortLevel(payload []byte, model string) []byte { - out := payload - - if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() { - if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { - out, _ = sjson.SetBytes(out, "reasoning_effort", normalized) - } - } - - if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() { - if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { - out, _ = sjson.SetBytes(out, "reasoning.effort", normalized) - } - } - - return out -} - -// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models. -// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently -// downgrading requests. -func ValidateThinkingConfig(payload []byte, model string) error { - if len(payload) == 0 || model == "" { - return nil - } - if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) { - return nil - } - - levels := util.GetModelThinkingLevels(model) - checkField := func(path string) error { - if effort := gjson.GetBytes(payload, path); effort.Exists() { - if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok { - return statusErr{ - code: http.StatusBadRequest, - msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")), - } - } - } - return nil - } - - if err := checkField("reasoning_effort"); err != nil { - return err - } - if err := checkField("reasoning.effort"); err != nil { - return err - } - return nil -} diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index ee014fc7..260165d9 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -12,6 +12,7 @@ import ( qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -65,12 +66,14 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, } func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - token, baseURL := qwenCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + token, baseURL := qwenCreds(auth) if baseURL == "" { baseURL = "https://portal.qwen.ai/v1" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -79,15 +82,16 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return resp, errValidate + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return resp, err } - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -140,18 +144,22 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req appendAPIResponseChunk(ctx, e.cfg, data) reporter.publish(ctx, parseOpenAIUsage(data)) var param any + // Note: TranslateNonStream uses req.Model (original with suffix) to preserve + // the original model name in the response for client compatibility. out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) resp = cliproxyexecutor.Response{Payload: []byte(out)} return resp, nil } func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - token, baseURL := qwenCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + token, baseURL := qwenCreds(auth) if baseURL == "" { baseURL = "https://portal.qwen.ai/v1" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -160,15 +168,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, _ = sjson.SetBytes(body, "model", baseModel) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return nil, errValidate + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String()) + if err != nil { + return nil, err } + toolsResult := gjson.GetBytes(body, "tools") // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. // This will have no real consequences. It's just to scare Qwen3. @@ -176,7 +184,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -256,13 +264,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut } func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) modelName := gjson.GetBytes(body, "model").String() if strings.TrimSpace(modelName) == "" { - modelName = req.Model + modelName = baseModel } enc, err := tokenizerForModel(modelName) diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go new file mode 100644 index 00000000..6a777c53 --- /dev/null +++ b/internal/runtime/executor/qwen_executor_test.go @@ -0,0 +1,30 @@ +package executor + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" +) + +func TestQwenExecutorParseSuffix(t *testing.T) { + tests := []struct { + name string + model string + wantBase string + wantLevel string + }{ + {"no suffix", "qwen-max", "qwen-max", ""}, + {"with level suffix", "qwen-max(high)", "qwen-max", "high"}, + {"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"}, + {"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := thinking.ParseSuffix(tt.model) + if result.ModelName != tt.wantBase { + t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase) + } + }) + } +} diff --git a/internal/runtime/executor/thinking_providers.go b/internal/runtime/executor/thinking_providers.go new file mode 100644 index 00000000..5a143670 --- /dev/null +++ b/internal/runtime/executor/thinking_providers.go @@ -0,0 +1,11 @@ +package executor + +import ( + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai" +) diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go new file mode 100644 index 00000000..cf0e373b --- /dev/null +++ b/internal/thinking/apply.go @@ -0,0 +1,481 @@ +// Package thinking provides unified thinking configuration processing. +package thinking + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + log "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" +) + +// providerAppliers maps provider names to their ProviderApplier implementations. +var providerAppliers = map[string]ProviderApplier{ + "gemini": nil, + "gemini-cli": nil, + "claude": nil, + "openai": nil, + "codex": nil, + "iflow": nil, + "antigravity": nil, +} + +// GetProviderApplier returns the ProviderApplier for the given provider name. +// Returns nil if the provider is not registered. +func GetProviderApplier(provider string) ProviderApplier { + return providerAppliers[provider] +} + +// RegisterProvider registers a provider applier by name. +func RegisterProvider(name string, applier ProviderApplier) { + providerAppliers[name] = applier +} + +// IsUserDefinedModel reports whether the model is a user-defined model that should +// have thinking configuration passed through without validation. +// +// User-defined models are configured via config file's models[] array +// (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models +// are marked with UserDefined=true at registration time. +// +// User-defined models should have their thinking configuration applied directly, +// letting the upstream service validate the configuration. +func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool { + if modelInfo == nil { + return true + } + return modelInfo.UserDefined +} + +// ApplyThinking applies thinking configuration to a request body. +// +// This is the unified entry point for all providers. It follows the processing +// order defined in FR25: route check → model capability query → config extraction +// → validation → application. +// +// Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"), +// the suffix configuration takes priority over any thinking parameters in the request body. +// This enables users to override thinking settings via the model name without modifying their +// request payload. +// +// Parameters: +// - body: Original request body JSON +// - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)") +// - fromFormat: Source request format (e.g., openai, codex, gemini) +// - toFormat: Target provider format for the request body (gemini, gemini-cli, antigravity, claude, openai, codex, iflow) +// +// Returns: +// - Modified request body JSON with thinking configuration applied +// - Error if validation fails (ThinkingError). On error, the original body +// is returned (not nil) to enable defensive programming patterns. +// +// Passthrough behavior (returns original body without error): +// - Unknown provider (not in providerAppliers map) +// - modelInfo.Thinking is nil (model doesn't support thinking) +// +// Note: Unknown models (modelInfo is nil) are treated as user-defined models: we skip +// validation and still apply the thinking config so the upstream can validate it. +// +// Example: +// +// // With suffix - suffix config takes priority +// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini") +// +// // Without suffix - uses body config +// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini") +func ApplyThinking(body []byte, model string, fromFormat string, toFormat string) ([]byte, error) { + providerFormat := strings.ToLower(strings.TrimSpace(toFormat)) + fromFormat = strings.ToLower(strings.TrimSpace(fromFormat)) + if fromFormat == "" { + fromFormat = providerFormat + } + // 1. Route check: Get provider applier + applier := GetProviderApplier(providerFormat) + if applier == nil { + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": model, + }).Debug("thinking: unknown provider, passthrough |") + return body, nil + } + + // 2. Parse suffix and get modelInfo + suffixResult := ParseSuffix(model) + baseModel := suffixResult.ModelName + modelInfo := registry.LookupModelInfo(baseModel) + + // 3. Model capability check + // Unknown models are treated as user-defined so thinking config can still be applied. + // The upstream service is responsible for validating the configuration. + if IsUserDefinedModel(modelInfo) { + return applyUserDefinedModel(body, modelInfo, fromFormat, providerFormat, suffixResult) + } + if modelInfo.Thinking == nil { + config := extractThinkingConfig(body, providerFormat) + if hasThinkingConfig(config) { + log.WithFields(log.Fields{ + "model": baseModel, + "provider": providerFormat, + }).Debug("thinking: model does not support thinking, stripping config |") + return StripThinkingConfig(body, providerFormat), nil + } + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": baseModel, + }).Debug("thinking: model does not support thinking, passthrough |") + return body, nil + } + + // 4. Get config: suffix priority over body + var config ThinkingConfig + if suffixResult.HasSuffix { + config = parseSuffixToConfig(suffixResult.RawSuffix, providerFormat, model) + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": model, + "mode": config.Mode, + "budget": config.Budget, + "level": config.Level, + }).Debug("thinking: config from model suffix |") + } else { + config = extractThinkingConfig(body, providerFormat) + if hasThinkingConfig(config) { + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": modelInfo.ID, + "mode": config.Mode, + "budget": config.Budget, + "level": config.Level, + }).Debug("thinking: original config from request |") + } + } + + if !hasThinkingConfig(config) { + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": modelInfo.ID, + }).Debug("thinking: no config found, passthrough |") + return body, nil + } + + // 5. Validate and normalize configuration + validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat, suffixResult.HasSuffix) + if err != nil { + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": modelInfo.ID, + "error": err.Error(), + }).Warn("thinking: validation failed |") + // Return original body on validation failure (defensive programming). + // This ensures callers who ignore the error won't receive nil body. + // The upstream service will decide how to handle the unmodified request. + return body, err + } + + // Defensive check: ValidateConfig should never return (nil, nil) + if validated == nil { + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": modelInfo.ID, + }).Warn("thinking: ValidateConfig returned nil config without error, passthrough |") + return body, nil + } + + log.WithFields(log.Fields{ + "provider": providerFormat, + "model": modelInfo.ID, + "mode": validated.Mode, + "budget": validated.Budget, + "level": validated.Level, + }).Debug("thinking: processed config to apply |") + + // 6. Apply configuration using provider-specific applier + return applier.Apply(body, *validated, modelInfo) +} + +// parseSuffixToConfig converts a raw suffix string to ThinkingConfig. +// +// Parsing priority: +// 1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto +// 2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel +// 3. Numeric values: positive integers → ModeBudget, 0 → ModeNone +// +// If none of the above match, returns empty ThinkingConfig (treated as no config). +func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig { + // 1. Try special values first (none, auto, -1) + if mode, ok := ParseSpecialSuffix(rawSuffix); ok { + switch mode { + case ModeNone: + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case ModeAuto: + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + } + } + + // 2. Try level parsing (minimal, low, medium, high, xhigh) + if level, ok := ParseLevelSuffix(rawSuffix); ok { + return ThinkingConfig{Mode: ModeLevel, Level: level} + } + + // 3. Try numeric parsing + if budget, ok := ParseNumericSuffix(rawSuffix); ok { + if budget == 0 { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + return ThinkingConfig{Mode: ModeBudget, Budget: budget} + } + + // Unknown suffix format - return empty config + log.WithFields(log.Fields{ + "provider": provider, + "model": model, + "raw_suffix": rawSuffix, + }).Debug("thinking: unknown suffix format, treating as no config |") + return ThinkingConfig{} +} + +// applyUserDefinedModel applies thinking configuration for user-defined models +// without ThinkingSupport validation. +func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, fromFormat, toFormat string, suffixResult SuffixResult) ([]byte, error) { + // Get model ID for logging + modelID := "" + if modelInfo != nil { + modelID = modelInfo.ID + } else { + modelID = suffixResult.ModelName + } + + // Get config: suffix priority over body + var config ThinkingConfig + if suffixResult.HasSuffix { + config = parseSuffixToConfig(suffixResult.RawSuffix, toFormat, modelID) + } else { + config = extractThinkingConfig(body, toFormat) + } + + if !hasThinkingConfig(config) { + log.WithFields(log.Fields{ + "model": modelID, + "provider": toFormat, + }).Debug("thinking: user-defined model, passthrough (no config) |") + return body, nil + } + + applier := GetProviderApplier(toFormat) + if applier == nil { + log.WithFields(log.Fields{ + "model": modelID, + "provider": toFormat, + }).Debug("thinking: user-defined model, passthrough (unknown provider) |") + return body, nil + } + + log.WithFields(log.Fields{ + "provider": toFormat, + "model": modelID, + "mode": config.Mode, + "budget": config.Budget, + "level": config.Level, + }).Debug("thinking: applying config for user-defined model (skip validation)") + + config = normalizeUserDefinedConfig(config, fromFormat, toFormat) + return applier.Apply(body, config, modelInfo) +} + +func normalizeUserDefinedConfig(config ThinkingConfig, fromFormat, toFormat string) ThinkingConfig { + if config.Mode != ModeLevel { + return config + } + if !isBudgetBasedProvider(toFormat) || !isLevelBasedProvider(fromFormat) { + return config + } + budget, ok := ConvertLevelToBudget(string(config.Level)) + if !ok { + return config + } + config.Mode = ModeBudget + config.Budget = budget + config.Level = "" + return config +} + +// extractThinkingConfig extracts provider-specific thinking config from request body. +func extractThinkingConfig(body []byte, provider string) ThinkingConfig { + if len(body) == 0 || !gjson.ValidBytes(body) { + return ThinkingConfig{} + } + + switch provider { + case "claude": + return extractClaudeConfig(body) + case "gemini", "gemini-cli", "antigravity": + return extractGeminiConfig(body, provider) + case "openai": + return extractOpenAIConfig(body) + case "codex": + return extractCodexConfig(body) + case "iflow": + config := extractIFlowConfig(body) + if hasThinkingConfig(config) { + return config + } + return extractOpenAIConfig(body) + default: + return ThinkingConfig{} + } +} + +func hasThinkingConfig(config ThinkingConfig) bool { + return config.Mode != ModeBudget || config.Budget != 0 || config.Level != "" +} + +// extractClaudeConfig extracts thinking configuration from Claude format request body. +// +// Claude API format: +// - thinking.type: "enabled" or "disabled" +// - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget) +// +// Priority: thinking.type="disabled" takes precedence over budget_tokens. +// When type="enabled" without budget_tokens, returns ModeAuto to indicate +// the user wants thinking enabled but didn't specify a budget. +func extractClaudeConfig(body []byte) ThinkingConfig { + thinkingType := gjson.GetBytes(body, "thinking.type").String() + if thinkingType == "disabled" { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + + // Check budget_tokens + if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() { + value := int(budget.Int()) + switch value { + case 0: + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case -1: + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + default: + return ThinkingConfig{Mode: ModeBudget, Budget: value} + } + } + + // If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified) + if thinkingType == "enabled" { + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + } + + return ThinkingConfig{} +} + +// extractGeminiConfig extracts thinking configuration from Gemini format request body. +// +// Gemini API format: +// - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3) +// - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5) +// +// For gemini-cli and antigravity providers, the path is prefixed with "request.". +// +// Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format). +// This allows newer Gemini 3 level-based configs to take precedence. +func extractGeminiConfig(body []byte, provider string) ThinkingConfig { + prefix := "generationConfig.thinkingConfig" + if provider == "gemini-cli" || provider == "antigravity" { + prefix = "request.generationConfig.thinkingConfig" + } + + // Check thinkingLevel first (Gemini 3 format takes precedence) + if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() { + value := level.String() + switch value { + case "none": + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case "auto": + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + default: + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)} + } + } + + // Check thinkingBudget (Gemini 2.5 format) + if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() { + value := int(budget.Int()) + switch value { + case 0: + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case -1: + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + default: + return ThinkingConfig{Mode: ModeBudget, Budget: value} + } + } + + return ThinkingConfig{} +} + +// extractOpenAIConfig extracts thinking configuration from OpenAI format request body. +// +// OpenAI API format: +// - reasoning_effort: "none", "low", "medium", "high" (discrete levels) +// +// OpenAI uses level-based thinking configuration only, no numeric budget support. +// The "none" value is treated specially to return ModeNone. +func extractOpenAIConfig(body []byte) ThinkingConfig { + // Check reasoning_effort (OpenAI Chat Completions format) + if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() { + value := effort.String() + if value == "none" { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)} + } + + return ThinkingConfig{} +} + +// extractCodexConfig extracts thinking configuration from Codex format request body. +// +// Codex API format (OpenAI Responses API): +// - reasoning.effort: "none", "low", "medium", "high" +// +// This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort". +func extractCodexConfig(body []byte) ThinkingConfig { + // Check reasoning.effort (Codex / OpenAI Responses API format) + if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() { + value := effort.String() + if value == "none" { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)} + } + + return ThinkingConfig{} +} + +// extractIFlowConfig extracts thinking configuration from iFlow format request body. +// +// iFlow API format (supports multiple model families): +// - GLM format: chat_template_kwargs.enable_thinking (boolean) +// - MiniMax format: reasoning_split (boolean) +// +// Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled". +// The actual budget/configuration is determined by the iFlow applier based on model capabilities. +// Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off. +func extractIFlowConfig(body []byte) ThinkingConfig { + // GLM format: chat_template_kwargs.enable_thinking + if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() { + if enabled.Bool() { + // Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets) + return ThinkingConfig{Mode: ModeBudget, Budget: 1} + } + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + + // MiniMax format: reasoning_split + if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() { + if split.Bool() { + // Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets) + return ThinkingConfig{Mode: ModeBudget, Budget: 1} + } + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + + return ThinkingConfig{} +} diff --git a/internal/thinking/convert.go b/internal/thinking/convert.go new file mode 100644 index 00000000..776ccef6 --- /dev/null +++ b/internal/thinking/convert.go @@ -0,0 +1,142 @@ +package thinking + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" +) + +// levelToBudgetMap defines the standard Level → Budget mapping. +// All keys are lowercase; lookups should use strings.ToLower. +var levelToBudgetMap = map[string]int{ + "none": 0, + "auto": -1, + "minimal": 512, + "low": 1024, + "medium": 8192, + "high": 24576, + "xhigh": 32768, +} + +// ConvertLevelToBudget converts a thinking level to a budget value. +// +// This is a semantic conversion that maps discrete levels to numeric budgets. +// Level matching is case-insensitive. +// +// Level → Budget mapping: +// - none → 0 +// - auto → -1 +// - minimal → 512 +// - low → 1024 +// - medium → 8192 +// - high → 24576 +// - xhigh → 32768 +// +// Returns: +// - budget: The converted budget value +// - ok: true if level is valid, false otherwise +func ConvertLevelToBudget(level string) (int, bool) { + budget, ok := levelToBudgetMap[strings.ToLower(level)] + return budget, ok +} + +// BudgetThreshold constants define the upper bounds for each thinking level. +// These are used by ConvertBudgetToLevel for range-based mapping. +const ( + // ThresholdMinimal is the upper bound for "minimal" level (1-512) + ThresholdMinimal = 512 + // ThresholdLow is the upper bound for "low" level (513-1024) + ThresholdLow = 1024 + // ThresholdMedium is the upper bound for "medium" level (1025-8192) + ThresholdMedium = 8192 + // ThresholdHigh is the upper bound for "high" level (8193-24576) + ThresholdHigh = 24576 +) + +// ConvertBudgetToLevel converts a budget value to the nearest thinking level. +// +// This is a semantic conversion that maps numeric budgets to discrete levels. +// Uses threshold-based mapping for range conversion. +// +// Budget → Level thresholds: +// - -1 → auto +// - 0 → none +// - 1-512 → minimal +// - 513-1024 → low +// - 1025-8192 → medium +// - 8193-24576 → high +// - 24577+ → xhigh +// +// Returns: +// - level: The converted thinking level string +// - ok: true if budget is valid, false for invalid negatives (< -1) +func ConvertBudgetToLevel(budget int) (string, bool) { + switch { + case budget < -1: + // Invalid negative values + return "", false + case budget == -1: + return string(LevelAuto), true + case budget == 0: + return string(LevelNone), true + case budget <= ThresholdMinimal: + return string(LevelMinimal), true + case budget <= ThresholdLow: + return string(LevelLow), true + case budget <= ThresholdMedium: + return string(LevelMedium), true + case budget <= ThresholdHigh: + return string(LevelHigh), true + default: + return string(LevelXHigh), true + } +} + +// ModelCapability describes the thinking format support of a model. +type ModelCapability int + +const ( + // CapabilityUnknown indicates modelInfo is nil (passthrough behavior, internal use). + CapabilityUnknown ModelCapability = iota - 1 + // CapabilityNone indicates model doesn't support thinking (Thinking is nil). + CapabilityNone + // CapabilityBudgetOnly indicates the model supports numeric budgets only. + CapabilityBudgetOnly + // CapabilityLevelOnly indicates the model supports discrete levels only. + CapabilityLevelOnly + // CapabilityHybrid indicates the model supports both budgets and levels. + CapabilityHybrid +) + +// detectModelCapability determines the thinking format capability of a model. +// +// This is an internal function used by validation and conversion helpers. +// It analyzes the model's ThinkingSupport configuration to classify the model: +// - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking) +// - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5) +// - CapabilityLevelOnly: Has Levels but no Min/Max (OpenAI, iFlow) +// - CapabilityHybrid: Has both Min/Max and Levels (Gemini 3) +// +// Note: Returns a special sentinel value when modelInfo itself is nil (unknown model). +func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability { + if modelInfo == nil { + return CapabilityUnknown // sentinel for "passthrough" behavior + } + if modelInfo.Thinking == nil { + return CapabilityNone + } + support := modelInfo.Thinking + hasBudget := support.Min > 0 || support.Max > 0 + hasLevels := len(support.Levels) > 0 + + switch { + case hasBudget && hasLevels: + return CapabilityHybrid + case hasBudget: + return CapabilityBudgetOnly + case hasLevels: + return CapabilityLevelOnly + default: + return CapabilityNone + } +} diff --git a/internal/thinking/errors.go b/internal/thinking/errors.go new file mode 100644 index 00000000..5eed9381 --- /dev/null +++ b/internal/thinking/errors.go @@ -0,0 +1,82 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import "net/http" + +// ErrorCode represents the type of thinking configuration error. +type ErrorCode string + +// Error codes for thinking configuration processing. +const ( + // ErrInvalidSuffix indicates the suffix format cannot be parsed. + // Example: "model(abc" (missing closing parenthesis) + ErrInvalidSuffix ErrorCode = "INVALID_SUFFIX" + + // ErrUnknownLevel indicates the level value is not in the valid list. + // Example: "model(ultra)" where "ultra" is not a valid level + ErrUnknownLevel ErrorCode = "UNKNOWN_LEVEL" + + // ErrThinkingNotSupported indicates the model does not support thinking. + // Example: claude-haiku-4-5 does not have thinking capability + ErrThinkingNotSupported ErrorCode = "THINKING_NOT_SUPPORTED" + + // ErrLevelNotSupported indicates the model does not support level mode. + // Example: using level with a budget-only model + ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED" + + // ErrBudgetOutOfRange indicates the budget value is outside model range. + // Example: budget 64000 exceeds max 20000 + ErrBudgetOutOfRange ErrorCode = "BUDGET_OUT_OF_RANGE" + + // ErrProviderMismatch indicates the provider does not match the model. + // Example: applying Claude format to a Gemini model + ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH" +) + +// ThinkingError represents an error that occurred during thinking configuration processing. +// +// This error type provides structured information about the error, including: +// - Code: A machine-readable error code for programmatic handling +// - Message: A human-readable description of the error +// - Model: The model name related to the error (optional) +// - Details: Additional context information (optional) +type ThinkingError struct { + // Code is the machine-readable error code + Code ErrorCode + // Message is the human-readable error description. + // Should be lowercase, no trailing period, with context if applicable. + Message string + // Model is the model name related to this error (optional) + Model string + // Details contains additional context information (optional) + Details map[string]interface{} +} + +// Error implements the error interface. +// Returns the message directly without code prefix. +// Use Code field for programmatic error handling. +func (e *ThinkingError) Error() string { + return e.Message +} + +// NewThinkingError creates a new ThinkingError with the given code and message. +func NewThinkingError(code ErrorCode, message string) *ThinkingError { + return &ThinkingError{ + Code: code, + Message: message, + } +} + +// NewThinkingErrorWithModel creates a new ThinkingError with model context. +func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingError { + return &ThinkingError{ + Code: code, + Message: message, + Model: model, + } +} + +// StatusCode implements a portable status code interface for HTTP handlers. +func (e *ThinkingError) StatusCode() int { + return http.StatusBadRequest +} diff --git a/internal/thinking/provider/antigravity/apply.go b/internal/thinking/provider/antigravity/apply.go new file mode 100644 index 00000000..9c1c79f6 --- /dev/null +++ b/internal/thinking/provider/antigravity/apply.go @@ -0,0 +1,201 @@ +// Package antigravity implements thinking configuration for Antigravity API format. +// +// Antigravity uses request.generationConfig.thinkingConfig.* path (same as gemini-cli) +// but requires additional normalization for Claude models: +// - Ensure thinking budget < max_tokens +// - Remove thinkingConfig if budget < minimum allowed +package antigravity + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier applies thinking configuration for Antigravity API format. +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new Antigravity thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("antigravity", NewApplier()) +} + +// Apply applies thinking configuration to Antigravity request body. +// +// For Claude models, additional constraints are applied: +// - Ensure thinking budget < max_tokens +// - Remove thinkingConfig if budget < minimum allowed +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if thinking.IsUserDefinedModel(modelInfo) { + return a.applyCompatible(body, config, modelInfo) + } + if modelInfo.Thinking == nil { + return body, nil + } + + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + isClaude := strings.Contains(strings.ToLower(modelInfo.ID), "claude") + + // ModeAuto: Always use Budget format with thinkingBudget=-1 + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config, modelInfo, isClaude) + } + if config.Mode == thinking.ModeBudget { + return a.applyBudgetFormat(body, config, modelInfo, isClaude) + } + + // For non-auto modes, choose format based on model capabilities + support := modelInfo.Thinking + if len(support.Levels) > 0 { + return a.applyLevelFormat(body, config) + } + return a.applyBudgetFormat(body, config, modelInfo, isClaude) +} + +func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + isClaude := false + if modelInfo != nil { + isClaude = strings.Contains(strings.ToLower(modelInfo.ID), "claude") + } + + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config, modelInfo, isClaude) + } + + if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") { + return a.applyLevelFormat(body, config) + } + + return a.applyBudgetFormat(body, config, modelInfo, isClaude) +} + +func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget") + // Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing. + result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts") + + if config.Mode == thinking.ModeNone { + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false) + if config.Level != "" { + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level)) + } + return result, nil + } + + // Only handle ModeLevel - budget conversion should be done by upper layer + if config.Mode != thinking.ModeLevel { + return body, nil + } + + level := string(config.Level) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true) + return result, nil +} + +func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo, isClaude bool) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel") + // Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing. + result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts") + + budget := config.Budget + includeThoughts := false + switch config.Mode { + case thinking.ModeNone: + includeThoughts = false + case thinking.ModeAuto: + includeThoughts = true + default: + includeThoughts = budget > 0 + } + + // Apply Claude-specific constraints + if isClaude && modelInfo != nil { + budget, result = a.normalizeClaudeBudget(budget, result, modelInfo) + // Check if budget was removed entirely + if budget == -2 { + return result, nil + } + } + + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts) + return result, nil +} + +// normalizeClaudeBudget applies Claude-specific constraints to thinking budget. +// +// It handles: +// - Ensuring thinking budget < max_tokens +// - Removing thinkingConfig if budget < minimum allowed +// +// Returns the normalized budget and updated payload. +// Returns budget=-2 as a sentinel indicating thinkingConfig was removed entirely. +func (a *Applier) normalizeClaudeBudget(budget int, payload []byte, modelInfo *registry.ModelInfo) (int, []byte) { + if modelInfo == nil { + return budget, payload + } + + // Get effective max tokens + effectiveMax, setDefaultMax := a.effectiveMaxTokens(payload, modelInfo) + if effectiveMax > 0 && budget >= effectiveMax { + budget = effectiveMax - 1 + } + + // Check minimum budget + minBudget := 0 + if modelInfo.Thinking != nil { + minBudget = modelInfo.Thinking.Min + } + if minBudget > 0 && budget >= 0 && budget < minBudget { + // Budget is below minimum, remove thinking config entirely + payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig") + return -2, payload + } + + // Set default max tokens if needed + if setDefaultMax && effectiveMax > 0 { + payload, _ = sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax) + } + + return budget, payload +} + +// effectiveMaxTokens returns the max tokens to cap thinking: +// prefer request-provided maxOutputTokens; otherwise fall back to model default. +// The boolean indicates whether the value came from the model default (and thus should be written back). +func (a *Applier) effectiveMaxTokens(payload []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) { + if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 { + return int(maxTok.Int()), false + } + if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 { + return modelInfo.MaxCompletionTokens, true + } + return 0, false +} diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go new file mode 100644 index 00000000..3c74d514 --- /dev/null +++ b/internal/thinking/provider/claude/apply.go @@ -0,0 +1,166 @@ +// Package claude implements thinking configuration scaffolding for Claude models. +// +// Claude models use the thinking.budget_tokens format with values in the range +// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5), +// while older models do not. +// See: _bmad-output/planning-artifacts/architecture.md#Epic-6 +package claude + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for Claude models. +// This applier is stateless and holds no configuration. +type Applier struct{} + +// NewApplier creates a new Claude thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("claude", NewApplier()) +} + +// Apply applies thinking configuration to Claude request body. +// +// IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig. +// ValidateConfig handles: +// - Mode conversion (Level→Budget, Auto→Budget) +// - Budget clamping to model range +// - ZeroAllowed constraint enforcement +// +// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged. +// +// Expected output format when enabled: +// +// { +// "thinking": { +// "type": "enabled", +// "budget_tokens": 16384 +// } +// } +// +// Expected output format when disabled: +// +// { +// "thinking": { +// "type": "disabled" +// } +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if thinking.IsUserDefinedModel(modelInfo) { + return applyCompatibleClaude(body, config) + } + if modelInfo.Thinking == nil { + return body, nil + } + + // Only process ModeBudget and ModeNone; other modes pass through + // (caller should use ValidateConfig first to normalize modes) + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + // Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced) + // Decide enabled/disabled based on budget value + if config.Budget == 0 { + result, _ := sjson.SetBytes(body, "thinking.type", "disabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + return result, nil + } + + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + + // Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint) + result = a.normalizeClaudeBudget(result, config.Budget, modelInfo) + return result, nil +} + +// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens. +// Anthropic API requires this constraint; violating it returns a 400 error. +func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte { + if budgetTokens <= 0 { + return body + } + + // Ensure the request satisfies Claude constraints: + // 1) Determine effective max_tokens (request overrides model default) + // 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1 + // 3) If the adjusted budget falls below the model minimum, leave the request unchanged + // 4) If max_tokens came from model default, write it back into the request + + effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo) + if setDefaultMax && effectiveMax > 0 { + body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax) + } + + // Compute the budget we would apply after enforcing budget_tokens < max_tokens. + adjustedBudget := budgetTokens + if effectiveMax > 0 && adjustedBudget >= effectiveMax { + adjustedBudget = effectiveMax - 1 + } + + minBudget := 0 + if modelInfo != nil && modelInfo.Thinking != nil { + minBudget = modelInfo.Thinking.Min + } + if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget { + // If enforcing the max_tokens constraint would push the budget below the model minimum, + // leave the request unchanged. + return body + } + + if adjustedBudget != budgetTokens { + body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget) + } + + return body +} + +// effectiveMaxTokens returns the max tokens to cap thinking: +// prefer request-provided max_tokens; otherwise fall back to model default. +// The boolean indicates whether the value came from the model default (and thus should be written back). +func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) { + if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 { + return int(maxTok.Int()), false + } + if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 { + return modelInfo.MaxCompletionTokens, true + } + return 0, false +} + +func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + switch config.Mode { + case thinking.ModeNone: + result, _ := sjson.SetBytes(body, "thinking.type", "disabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + return result, nil + case thinking.ModeAuto: + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + return result, nil + default: + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + return result, nil + } +} diff --git a/internal/thinking/provider/codex/apply.go b/internal/thinking/provider/codex/apply.go new file mode 100644 index 00000000..3bed318b --- /dev/null +++ b/internal/thinking/provider/codex/apply.go @@ -0,0 +1,131 @@ +// Package codex implements thinking configuration for Codex (OpenAI Responses API) models. +// +// Codex models use the reasoning.effort format with discrete levels +// (low/medium/high). This is similar to OpenAI but uses nested field +// "reasoning.effort" instead of "reasoning_effort". +// See: _bmad-output/planning-artifacts/architecture.md#Epic-8 +package codex + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for Codex models. +// +// Codex-specific behavior: +// - Output format: reasoning.effort (string: low/medium/high/xhigh) +// - Level-only mode: no numeric budget support +// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2) +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new Codex thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("codex", NewApplier()) +} + +// Apply applies thinking configuration to Codex request body. +// +// Expected output format: +// +// { +// "reasoning": { +// "effort": "high" +// } +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if thinking.IsUserDefinedModel(modelInfo) { + return applyCompatibleCodex(body, config) + } + if modelInfo.Thinking == nil { + return body, nil + } + + // Only handle ModeLevel and ModeNone; other modes pass through unchanged. + if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeLevel { + result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level)) + return result, nil + } + + effort := "" + support := modelInfo.Thinking + if config.Budget == 0 { + if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) { + effort = string(thinking.LevelNone) + } + } + if effort == "" && config.Level != "" { + effort = string(config.Level) + } + if effort == "" && len(support.Levels) > 0 { + effort = support.Levels[0] + } + if effort == "" { + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning.effort", effort) + return result, nil +} + +func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + var effort string + switch config.Mode { + case thinking.ModeLevel: + if config.Level == "" { + return body, nil + } + effort = string(config.Level) + case thinking.ModeNone: + effort = string(thinking.LevelNone) + if config.Level != "" { + effort = string(config.Level) + } + case thinking.ModeAuto: + // Auto mode for user-defined models: pass through as "auto" + effort = string(thinking.LevelAuto) + case thinking.ModeBudget: + // Budget mode: convert budget to level using threshold mapping + level, ok := thinking.ConvertBudgetToLevel(config.Budget) + if !ok { + return body, nil + } + effort = level + default: + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning.effort", effort) + return result, nil +} + +func hasLevel(levels []string, target string) bool { + for _, level := range levels { + if strings.EqualFold(strings.TrimSpace(level), target) { + return true + } + } + return false +} diff --git a/internal/thinking/provider/gemini/apply.go b/internal/thinking/provider/gemini/apply.go new file mode 100644 index 00000000..c8560f19 --- /dev/null +++ b/internal/thinking/provider/gemini/apply.go @@ -0,0 +1,169 @@ +// Package gemini implements thinking configuration for Gemini models. +// +// Gemini models have two formats: +// - Gemini 2.5: Uses thinkingBudget (numeric) +// - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high) +// or thinkingBudget=-1 for auto/dynamic mode +// +// Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels: +// - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x) +// - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels) +// - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5) +package gemini + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier applies thinking configuration for Gemini models. +// +// Gemini-specific behavior: +// - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed +// - Gemini 3.x: thinkingLevel format, cannot be disabled +// - Use ThinkingSupport.Levels to decide output format +type Applier struct{} + +// NewApplier creates a new Gemini thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("gemini", NewApplier()) +} + +// Apply applies thinking configuration to Gemini request body. +// +// Expected output format (Gemini 2.5): +// +// { +// "generationConfig": { +// "thinkingConfig": { +// "thinkingBudget": 8192, +// "includeThoughts": true +// } +// } +// } +// +// Expected output format (Gemini 3.x): +// +// { +// "generationConfig": { +// "thinkingConfig": { +// "thinkingLevel": "high", +// "includeThoughts": true +// } +// } +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if thinking.IsUserDefinedModel(modelInfo) { + return a.applyCompatible(body, config) + } + if modelInfo.Thinking == nil { + return body, nil + } + + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + // Choose format based on config.Mode and model capabilities: + // - ModeLevel: use Level format (validation will reject unsupported levels) + // - ModeNone: use Level format if model has Levels, else Budget format + // - ModeBudget/ModeAuto: use Budget format + switch config.Mode { + case thinking.ModeLevel: + return a.applyLevelFormat(body, config) + case thinking.ModeNone: + // ModeNone: route based on model capability (has Levels or not) + if len(modelInfo.Thinking.Levels) > 0 { + return a.applyLevelFormat(body, config) + } + return a.applyBudgetFormat(body, config) + default: + return a.applyBudgetFormat(body, config) + } +} + +func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config) + } + + if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") { + return a.applyLevelFormat(body, config) + } + + return a.applyBudgetFormat(body, config) +} + +func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // ModeNone semantics: + // - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models) + // - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false) + // ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0. + + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget") + // Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing. + result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts") + + if config.Mode == thinking.ModeNone { + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false) + if config.Level != "" { + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level)) + } + return result, nil + } + + // Only handle ModeLevel - budget conversion should be done by upper layer + if config.Mode != thinking.ModeLevel { + return body, nil + } + + level := string(config.Level) + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level) + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true) + return result, nil +} + +func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel") + // Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing. + result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts") + + budget := config.Budget + // ModeNone semantics: + // - ModeNone + Budget=0: completely disable thinking + // - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false) + // When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone. + includeThoughts := false + switch config.Mode { + case thinking.ModeNone: + includeThoughts = false + case thinking.ModeAuto: + includeThoughts = true + default: + includeThoughts = budget > 0 + } + + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget) + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts) + return result, nil +} diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go new file mode 100644 index 00000000..75d9242a --- /dev/null +++ b/internal/thinking/provider/geminicli/apply.go @@ -0,0 +1,126 @@ +// Package geminicli implements thinking configuration for Gemini CLI API format. +// +// Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of +// generationConfig.thinkingConfig.* used by standard Gemini API. +package geminicli + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier applies thinking configuration for Gemini CLI API format. +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new Gemini CLI thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("gemini-cli", NewApplier()) +} + +// Apply applies thinking configuration to Gemini CLI request body. +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if thinking.IsUserDefinedModel(modelInfo) { + return a.applyCompatible(body, config) + } + if modelInfo.Thinking == nil { + return body, nil + } + + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + // ModeAuto: Always use Budget format with thinkingBudget=-1 + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config) + } + if config.Mode == thinking.ModeBudget { + return a.applyBudgetFormat(body, config) + } + + // For non-auto modes, choose format based on model capabilities + support := modelInfo.Thinking + if len(support.Levels) > 0 { + return a.applyLevelFormat(body, config) + } + return a.applyBudgetFormat(body, config) +} + +func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config) + } + + if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") { + return a.applyLevelFormat(body, config) + } + + return a.applyBudgetFormat(body, config) +} + +func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget") + // Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing. + result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts") + + if config.Mode == thinking.ModeNone { + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false) + if config.Level != "" { + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level)) + } + return result, nil + } + + // Only handle ModeLevel - budget conversion should be done by upper layer + if config.Mode != thinking.ModeLevel { + return body, nil + } + + level := string(config.Level) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true) + return result, nil +} + +func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel") + // Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing. + result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts") + + budget := config.Budget + includeThoughts := false + switch config.Mode { + case thinking.ModeNone: + includeThoughts = false + case thinking.ModeAuto: + includeThoughts = true + default: + includeThoughts = budget > 0 + } + + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts) + return result, nil +} diff --git a/internal/thinking/provider/iflow/apply.go b/internal/thinking/provider/iflow/apply.go new file mode 100644 index 00000000..da986d22 --- /dev/null +++ b/internal/thinking/provider/iflow/apply.go @@ -0,0 +1,156 @@ +// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax). +// +// iFlow models use boolean toggle semantics: +// - GLM models: chat_template_kwargs.enable_thinking (boolean) +// - MiniMax models: reasoning_split (boolean) +// +// Level values are converted to boolean: none=false, all others=true +// See: _bmad-output/planning-artifacts/architecture.md#Epic-9 +package iflow + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for iFlow models. +// +// iFlow-specific behavior: +// - GLM models: enable_thinking boolean + clear_thinking=false +// - MiniMax models: reasoning_split boolean +// - Level to boolean: none=false, others=true +// - No quantized support (only on/off) +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new iFlow thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("iflow", NewApplier()) +} + +// Apply applies thinking configuration to iFlow request body. +// +// Expected output format (GLM): +// +// { +// "chat_template_kwargs": { +// "enable_thinking": true, +// "clear_thinking": false +// } +// } +// +// Expected output format (MiniMax): +// +// { +// "reasoning_split": true +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if thinking.IsUserDefinedModel(modelInfo) { + return body, nil + } + if modelInfo.Thinking == nil { + return body, nil + } + + if isGLMModel(modelInfo.ID) { + return applyGLM(body, config), nil + } + + if isMiniMaxModel(modelInfo.ID) { + return applyMiniMax(body, config), nil + } + + return body, nil +} + +// configToBoolean converts ThinkingConfig to boolean for iFlow models. +// +// Conversion rules: +// - ModeNone: false +// - ModeAuto: true +// - ModeBudget + Budget=0: false +// - ModeBudget + Budget>0: true +// - ModeLevel + Level="none": false +// - ModeLevel + any other level: true +// - Default (unknown mode): true +func configToBoolean(config thinking.ThinkingConfig) bool { + switch config.Mode { + case thinking.ModeNone: + return false + case thinking.ModeAuto: + return true + case thinking.ModeBudget: + return config.Budget > 0 + case thinking.ModeLevel: + return config.Level != thinking.LevelNone + default: + return true + } +} + +// applyGLM applies thinking configuration for GLM models. +// +// Output format when enabled: +// +// {"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}} +// +// Output format when disabled: +// +// {"chat_template_kwargs": {"enable_thinking": false}} +// +// Note: clear_thinking is only set when thinking is enabled, to preserve +// thinking output in the response. +func applyGLM(body []byte, config thinking.ThinkingConfig) []byte { + enableThinking := configToBoolean(config) + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking) + + // clear_thinking only needed when thinking is enabled + if enableThinking { + result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false) + } + + return result +} + +// applyMiniMax applies thinking configuration for MiniMax models. +// +// Output format: +// +// {"reasoning_split": true/false} +func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte { + reasoningSplit := configToBoolean(config) + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit) + + return result +} + +// isGLMModel determines if the model is a GLM series model. +// GLM models use chat_template_kwargs.enable_thinking format. +func isGLMModel(modelID string) bool { + return strings.HasPrefix(strings.ToLower(modelID), "glm") +} + +// isMiniMaxModel determines if the model is a MiniMax series model. +// MiniMax models use reasoning_split format. +func isMiniMaxModel(modelID string) bool { + return strings.HasPrefix(strings.ToLower(modelID), "minimax") +} diff --git a/internal/thinking/provider/openai/apply.go b/internal/thinking/provider/openai/apply.go new file mode 100644 index 00000000..eaad30ee --- /dev/null +++ b/internal/thinking/provider/openai/apply.go @@ -0,0 +1,128 @@ +// Package openai implements thinking configuration for OpenAI/Codex models. +// +// OpenAI models use the reasoning_effort format with discrete levels +// (low/medium/high). Some models support xhigh and none levels. +// See: _bmad-output/planning-artifacts/architecture.md#Epic-8 +package openai + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for OpenAI models. +// +// OpenAI-specific behavior: +// - Output format: reasoning_effort (string: low/medium/high/xhigh) +// - Level-only mode: no numeric budget support +// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2) +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new OpenAI thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("openai", NewApplier()) +} + +// Apply applies thinking configuration to OpenAI request body. +// +// Expected output format: +// +// { +// "reasoning_effort": "high" +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if thinking.IsUserDefinedModel(modelInfo) { + return applyCompatibleOpenAI(body, config) + } + if modelInfo.Thinking == nil { + return body, nil + } + + // Only handle ModeLevel and ModeNone; other modes pass through unchanged. + if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeLevel { + result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level)) + return result, nil + } + + effort := "" + support := modelInfo.Thinking + if config.Budget == 0 { + if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) { + effort = string(thinking.LevelNone) + } + } + if effort == "" && config.Level != "" { + effort = string(config.Level) + } + if effort == "" && len(support.Levels) > 0 { + effort = support.Levels[0] + } + if effort == "" { + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning_effort", effort) + return result, nil +} + +func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + var effort string + switch config.Mode { + case thinking.ModeLevel: + if config.Level == "" { + return body, nil + } + effort = string(config.Level) + case thinking.ModeNone: + effort = string(thinking.LevelNone) + if config.Level != "" { + effort = string(config.Level) + } + case thinking.ModeAuto: + // Auto mode for user-defined models: pass through as "auto" + effort = string(thinking.LevelAuto) + case thinking.ModeBudget: + // Budget mode: convert budget to level using threshold mapping + level, ok := thinking.ConvertBudgetToLevel(config.Budget) + if !ok { + return body, nil + } + effort = level + default: + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning_effort", effort) + return result, nil +} + +func hasLevel(levels []string, target string) bool { + for _, level := range levels { + if strings.EqualFold(strings.TrimSpace(level), target) { + return true + } + } + return false +} diff --git a/internal/thinking/strip.go b/internal/thinking/strip.go new file mode 100644 index 00000000..eb691715 --- /dev/null +++ b/internal/thinking/strip.go @@ -0,0 +1,58 @@ +// Package thinking provides unified thinking configuration processing. +package thinking + +import ( + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// StripThinkingConfig removes thinking configuration fields from request body. +// +// This function is used when a model doesn't support thinking but the request +// contains thinking configuration. The configuration is silently removed to +// prevent upstream API errors. +// +// Parameters: +// - body: Original request body JSON +// - provider: Provider name (determines which fields to strip) +// +// Returns: +// - Modified request body JSON with thinking configuration removed +// - Original body is returned unchanged if: +// - body is empty or invalid JSON +// - provider is unknown +// - no thinking configuration found +func StripThinkingConfig(body []byte, provider string) []byte { + if len(body) == 0 || !gjson.ValidBytes(body) { + return body + } + + var paths []string + switch provider { + case "claude": + paths = []string{"thinking"} + case "gemini": + paths = []string{"generationConfig.thinkingConfig"} + case "gemini-cli", "antigravity": + paths = []string{"request.generationConfig.thinkingConfig"} + case "openai": + paths = []string{"reasoning_effort"} + case "codex": + paths = []string{"reasoning.effort"} + case "iflow": + paths = []string{ + "chat_template_kwargs.enable_thinking", + "chat_template_kwargs.clear_thinking", + "reasoning_split", + "reasoning_effort", + } + default: + return body + } + + result := body + for _, path := range paths { + result, _ = sjson.DeleteBytes(result, path) + } + return result +} diff --git a/internal/thinking/suffix.go b/internal/thinking/suffix.go new file mode 100644 index 00000000..275c0856 --- /dev/null +++ b/internal/thinking/suffix.go @@ -0,0 +1,146 @@ +// Package thinking provides unified thinking configuration processing. +// +// This file implements suffix parsing functionality for extracting +// thinking configuration from model names in the format model(value). +package thinking + +import ( + "strconv" + "strings" +) + +// ParseSuffix extracts thinking suffix from a model name. +// +// The suffix format is: model-name(value) +// Examples: +// - "claude-sonnet-4-5(16384)" -> ModelName="claude-sonnet-4-5", RawSuffix="16384" +// - "gpt-5.2(high)" -> ModelName="gpt-5.2", RawSuffix="high" +// - "gemini-2.5-pro" -> ModelName="gemini-2.5-pro", HasSuffix=false +// +// This function only extracts the suffix; it does not validate or interpret +// the suffix content. Use ParseNumericSuffix, ParseLevelSuffix, etc. for +// content interpretation. +func ParseSuffix(model string) SuffixResult { + // Find the last opening parenthesis + lastOpen := strings.LastIndex(model, "(") + if lastOpen == -1 { + return SuffixResult{ModelName: model, HasSuffix: false} + } + + // Check if the string ends with a closing parenthesis + if !strings.HasSuffix(model, ")") { + return SuffixResult{ModelName: model, HasSuffix: false} + } + + // Extract components + modelName := model[:lastOpen] + rawSuffix := model[lastOpen+1 : len(model)-1] + + return SuffixResult{ + ModelName: modelName, + HasSuffix: true, + RawSuffix: rawSuffix, + } +} + +// ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value. +// +// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer. +// Only non-negative integers are considered valid numeric suffixes. +// +// Platform note: The budget value uses Go's int type, which is 32-bit on 32-bit +// systems and 64-bit on 64-bit systems. Values exceeding the platform's int range +// will return ok=false. +// +// Leading zeros are accepted: "08192" parses as 8192. +// +// Examples: +// - "8192" -> budget=8192, ok=true +// - "0" -> budget=0, ok=true (represents ModeNone) +// - "08192" -> budget=8192, ok=true (leading zeros accepted) +// - "-1" -> budget=0, ok=false (negative numbers are not valid numeric suffixes) +// - "high" -> budget=0, ok=false (not a number) +// - "9223372036854775808" -> budget=0, ok=false (overflow on 64-bit systems) +// +// For special handling of -1 as auto mode, use ParseSpecialSuffix instead. +func ParseNumericSuffix(rawSuffix string) (budget int, ok bool) { + if rawSuffix == "" { + return 0, false + } + + value, err := strconv.Atoi(rawSuffix) + if err != nil { + return 0, false + } + + // Negative numbers are not valid numeric suffixes + // -1 should be handled by special value parsing as "auto" + if value < 0 { + return 0, false + } + + return value, true +} + +// ParseSpecialSuffix attempts to parse a raw suffix as a special thinking mode value. +// +// This function handles special strings that represent a change in thinking mode: +// - "none" -> ModeNone (disables thinking) +// - "auto" -> ModeAuto (automatic/dynamic thinking) +// - "-1" -> ModeAuto (numeric representation of auto mode) +// +// String values are case-insensitive. +func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) { + if rawSuffix == "" { + return ModeBudget, false + } + + // Case-insensitive matching + switch strings.ToLower(rawSuffix) { + case "none": + return ModeNone, true + case "auto", "-1": + return ModeAuto, true + default: + return ModeBudget, false + } +} + +// ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level. +// +// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level. +// Only discrete effort levels are valid: minimal, low, medium, high, xhigh. +// Level matching is case-insensitive. +// +// Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix +// instead. This separation allows callers to prioritize special value handling. +// +// Examples: +// - "high" -> level=LevelHigh, ok=true +// - "HIGH" -> level=LevelHigh, ok=true (case insensitive) +// - "medium" -> level=LevelMedium, ok=true +// - "none" -> level="", ok=false (special value, use ParseSpecialSuffix) +// - "auto" -> level="", ok=false (special value, use ParseSpecialSuffix) +// - "8192" -> level="", ok=false (numeric, use ParseNumericSuffix) +// - "ultra" -> level="", ok=false (unknown level) +func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) { + if rawSuffix == "" { + return "", false + } + + // Case-insensitive matching + switch strings.ToLower(rawSuffix) { + case "minimal": + return LevelMinimal, true + case "low": + return LevelLow, true + case "medium": + return LevelMedium, true + case "high": + return LevelHigh, true + case "xhigh": + return LevelXHigh, true + default: + return "", false + } +} diff --git a/internal/thinking/text.go b/internal/thinking/text.go new file mode 100644 index 00000000..eed1ba28 --- /dev/null +++ b/internal/thinking/text.go @@ -0,0 +1,41 @@ +package thinking + +import ( + "github.com/tidwall/gjson" +) + +// GetThinkingText extracts the thinking text from a content part. +// Handles various formats: +// - Simple string: { "thinking": "text" } or { "text": "text" } +// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } } +// - Gemini-style: { "thought": true, "text": "text" } +// Returns the extracted text string. +func GetThinkingText(part gjson.Result) string { + // Try direct text field first (Gemini-style) + if text := part.Get("text"); text.Exists() && text.Type == gjson.String { + return text.String() + } + + // Try thinking field + thinkingField := part.Get("thinking") + if !thinkingField.Exists() { + return "" + } + + // thinking is a string + if thinkingField.Type == gjson.String { + return thinkingField.String() + } + + // thinking is an object with inner text/thinking + if thinkingField.IsObject() { + if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String { + return inner.String() + } + if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String { + return inner.String() + } + } + + return "" +} diff --git a/internal/thinking/types.go b/internal/thinking/types.go new file mode 100644 index 00000000..6ae1e088 --- /dev/null +++ b/internal/thinking/types.go @@ -0,0 +1,116 @@ +// Package thinking provides unified thinking configuration processing. +// +// This package offers a unified interface for parsing, validating, and applying +// thinking configurations across various AI providers (Claude, Gemini, OpenAI, iFlow). +package thinking + +import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + +// ThinkingMode represents the type of thinking configuration mode. +type ThinkingMode int + +const ( + // ModeBudget indicates using a numeric budget (corresponds to suffix "(1000)" etc.) + ModeBudget ThinkingMode = iota + // ModeLevel indicates using a discrete level (corresponds to suffix "(high)" etc.) + ModeLevel + // ModeNone indicates thinking is disabled (corresponds to suffix "(none)" or budget=0) + ModeNone + // ModeAuto indicates automatic/dynamic thinking (corresponds to suffix "(auto)" or budget=-1) + ModeAuto +) + +// String returns the string representation of ThinkingMode. +func (m ThinkingMode) String() string { + switch m { + case ModeBudget: + return "budget" + case ModeLevel: + return "level" + case ModeNone: + return "none" + case ModeAuto: + return "auto" + default: + return "unknown" + } +} + +// ThinkingLevel represents a discrete thinking level. +type ThinkingLevel string + +const ( + // LevelNone disables thinking + LevelNone ThinkingLevel = "none" + // LevelAuto enables automatic/dynamic thinking + LevelAuto ThinkingLevel = "auto" + // LevelMinimal sets minimal thinking effort + LevelMinimal ThinkingLevel = "minimal" + // LevelLow sets low thinking effort + LevelLow ThinkingLevel = "low" + // LevelMedium sets medium thinking effort + LevelMedium ThinkingLevel = "medium" + // LevelHigh sets high thinking effort + LevelHigh ThinkingLevel = "high" + // LevelXHigh sets extra-high thinking effort + LevelXHigh ThinkingLevel = "xhigh" +) + +// ThinkingConfig represents a unified thinking configuration. +// +// This struct is used to pass thinking configuration information between components. +// Depending on Mode, either Budget or Level field is effective: +// - ModeNone: Budget=0, Level is ignored +// - ModeAuto: Budget=-1, Level is ignored +// - ModeBudget: Budget is a positive integer, Level is ignored +// - ModeLevel: Budget is ignored, Level is a valid level +type ThinkingConfig struct { + // Mode specifies the configuration mode + Mode ThinkingMode + // Budget is the thinking budget (token count), only effective when Mode is ModeBudget. + // Special values: 0 means disabled, -1 means automatic + Budget int + // Level is the thinking level, only effective when Mode is ModeLevel + Level ThinkingLevel +} + +// SuffixResult represents the result of parsing a model name for thinking suffix. +// +// A thinking suffix is specified in the format model-name(value), where value +// can be a numeric budget (e.g., "16384") or a level name (e.g., "high"). +type SuffixResult struct { + // ModelName is the model name with the suffix removed. + // If no suffix was found, this equals the original input. + ModelName string + + // HasSuffix indicates whether a valid suffix was found. + HasSuffix bool + + // RawSuffix is the content inside the parentheses, without the parentheses. + // Empty string if HasSuffix is false. + RawSuffix string +} + +// ProviderApplier defines the interface for provider-specific thinking configuration application. +// +// Types implementing this interface are responsible for converting a unified ThinkingConfig +// into provider-specific format and applying it to the request body. +// +// Implementation requirements: +// - Apply method must be idempotent +// - Must not modify the input config or modelInfo +// - Returns a modified copy of the request body +// - Returns appropriate ThinkingError for unsupported configurations +type ProviderApplier interface { + // Apply applies the thinking configuration to the request body. + // + // Parameters: + // - body: Original request body JSON + // - config: Unified thinking configuration + // - modelInfo: Model registry information containing ThinkingSupport properties + // + // Returns: + // - Modified request body JSON + // - ThinkingError if the configuration is invalid or unsupported + Apply(body []byte, config ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) +} diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go new file mode 100644 index 00000000..f082ad56 --- /dev/null +++ b/internal/thinking/validate.go @@ -0,0 +1,378 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import ( + "fmt" + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + log "github.com/sirupsen/logrus" +) + +// ValidateConfig validates a thinking configuration against model capabilities. +// +// This function performs comprehensive validation: +// - Checks if the model supports thinking +// - Auto-converts between Budget and Level formats based on model capability +// - Validates that requested level is in the model's supported levels list +// - Clamps budget values to model's allowed range +// - When converting Budget -> Level for level-only models, clamps the derived standard level to the nearest supported level +// (special values none/auto are preserved) +// - When config comes from a model suffix, strict budget validation is disabled (we clamp instead of error) +// +// Parameters: +// - config: The thinking configuration to validate +// - support: Model's ThinkingSupport properties (nil means no thinking support) +// - fromFormat: Source provider format (used to determine strict validation rules) +// - toFormat: Target provider format +// - fromSuffix: Whether config was sourced from model suffix +// +// Returns: +// - Normalized ThinkingConfig with clamped values +// - ThinkingError if validation fails (ErrThinkingNotSupported, ErrLevelNotSupported, etc.) +// +// Auto-conversion behavior: +// - Budget-only model + Level config → Level converted to Budget +// - Level-only model + Budget config → Budget converted to Level +// - Hybrid model → preserve original format +func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string, fromSuffix bool) (*ThinkingConfig, error) { + fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat)) + model := "unknown" + support := (*registry.ThinkingSupport)(nil) + if modelInfo != nil { + if modelInfo.ID != "" { + model = modelInfo.ID + } + support = modelInfo.Thinking + } + + if support == nil { + if config.Mode != ModeNone { + return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", model) + } + return &config, nil + } + + allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat) + strictBudget := !fromSuffix && fromFormat != "" && isSameProviderFamily(fromFormat, toFormat) + budgetDerivedFromLevel := false + + capability := detectModelCapability(modelInfo) + switch capability { + case CapabilityBudgetOnly: + if config.Mode == ModeLevel { + if config.Level == LevelAuto { + break + } + budget, ok := ConvertLevelToBudget(string(config.Level)) + if !ok { + return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", config.Level)) + } + config.Mode = ModeBudget + config.Budget = budget + config.Level = "" + budgetDerivedFromLevel = true + } + case CapabilityLevelOnly: + if config.Mode == ModeBudget { + level, ok := ConvertBudgetToLevel(config.Budget) + if !ok { + return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", config.Budget)) + } + // When converting Budget -> Level for level-only models, clamp the derived standard level + // to the nearest supported level. Special values (none/auto) are preserved. + config.Mode = ModeLevel + config.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat) + config.Budget = 0 + } + case CapabilityHybrid: + } + + if config.Mode == ModeLevel && config.Level == LevelNone { + config.Mode = ModeNone + config.Budget = 0 + config.Level = "" + } + if config.Mode == ModeLevel && config.Level == LevelAuto { + config.Mode = ModeAuto + config.Budget = -1 + config.Level = "" + } + if config.Mode == ModeBudget && config.Budget == 0 { + config.Mode = ModeNone + config.Level = "" + } + + if len(support.Levels) > 0 && config.Mode == ModeLevel { + if !isLevelSupported(string(config.Level), support.Levels) { + if allowClampUnsupported { + config.Level = clampLevel(config.Level, modelInfo, toFormat) + } + if !isLevelSupported(string(config.Level), support.Levels) { + // User explicitly specified an unsupported level - return error + // (budget-derived levels may be clamped based on source format) + validLevels := normalizeLevels(support.Levels) + message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(config.Level)), strings.Join(validLevels, ", ")) + return nil, NewThinkingError(ErrLevelNotSupported, message) + } + } + } + + if strictBudget && config.Mode == ModeBudget && !budgetDerivedFromLevel { + min, max := support.Min, support.Max + if min != 0 || max != 0 { + if config.Budget < min || config.Budget > max || (config.Budget == 0 && !support.ZeroAllowed) { + message := fmt.Sprintf("budget %d out of range [%d,%d]", config.Budget, min, max) + return nil, NewThinkingError(ErrBudgetOutOfRange, message) + } + } + } + + // Convert ModeAuto to mid-range if dynamic not allowed + if config.Mode == ModeAuto && !support.DynamicAllowed { + config = convertAutoToMidRange(config, support, toFormat, model) + } + + if config.Mode == ModeNone && toFormat == "claude" { + // Claude supports explicit disable via thinking.type="disabled". + // Keep Budget=0 so applier can omit budget_tokens. + config.Budget = 0 + config.Level = "" + } else { + switch config.Mode { + case ModeBudget, ModeAuto, ModeNone: + config.Budget = clampBudget(config.Budget, modelInfo, toFormat) + } + + // ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models + // This ensures Apply layer doesn't need to access support.Levels + if config.Mode == ModeNone && config.Budget > 0 && len(support.Levels) > 0 { + config.Level = ThinkingLevel(support.Levels[0]) + } + } + + return &config, nil +} + +// convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed. +// +// This function handles the case where a model does not support dynamic/auto thinking. +// The auto mode is silently converted to a fixed value based on model capability: +// - Level-only models: convert to ModeLevel with LevelMedium +// - Budget models: convert to ModeBudget with mid = (Min + Max) / 2 +// +// Logging: +// - Debug level when conversion occurs +// - Fields: original_mode, clamped_to, reason +func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport, provider, model string) ThinkingConfig { + // For level-only models (has Levels but no Min/Max range), use ModeLevel with medium + if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 { + config.Mode = ModeLevel + config.Level = LevelMedium + config.Budget = 0 + log.WithFields(log.Fields{ + "provider": provider, + "model": model, + "original_mode": "auto", + "clamped_to": string(LevelMedium), + }).Debug("thinking: mode converted, dynamic not allowed, using medium level |") + return config + } + + // For budget models, use mid-range budget + mid := (support.Min + support.Max) / 2 + if mid <= 0 && support.ZeroAllowed { + config.Mode = ModeNone + config.Budget = 0 + } else if mid <= 0 { + config.Mode = ModeBudget + config.Budget = support.Min + } else { + config.Mode = ModeBudget + config.Budget = mid + } + log.WithFields(log.Fields{ + "provider": provider, + "model": model, + "original_mode": "auto", + "clamped_to": config.Budget, + }).Debug("thinking: mode converted, dynamic not allowed |") + return config +} + +// standardLevelOrder defines the canonical ordering of thinking levels from lowest to highest. +var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh} + +// clampLevel clamps the given level to the nearest supported level. +// On tie, prefers the lower level. +func clampLevel(level ThinkingLevel, modelInfo *registry.ModelInfo, provider string) ThinkingLevel { + model := "unknown" + var supported []string + if modelInfo != nil { + if modelInfo.ID != "" { + model = modelInfo.ID + } + if modelInfo.Thinking != nil { + supported = modelInfo.Thinking.Levels + } + } + + if len(supported) == 0 || isLevelSupported(string(level), supported) { + return level + } + + pos := levelIndex(string(level)) + if pos == -1 { + return level + } + bestIdx, bestDist := -1, len(standardLevelOrder)+1 + + for _, s := range supported { + if idx := levelIndex(strings.TrimSpace(s)); idx != -1 { + if dist := abs(pos - idx); dist < bestDist || (dist == bestDist && idx < bestIdx) { + bestIdx, bestDist = idx, dist + } + } + } + + if bestIdx >= 0 { + clamped := standardLevelOrder[bestIdx] + log.WithFields(log.Fields{ + "provider": provider, + "model": model, + "original_value": string(level), + "clamped_to": string(clamped), + }).Debug("thinking: level clamped |") + return clamped + } + return level +} + +// clampBudget clamps a budget value to the model's supported range. +func clampBudget(value int, modelInfo *registry.ModelInfo, provider string) int { + model := "unknown" + support := (*registry.ThinkingSupport)(nil) + if modelInfo != nil { + if modelInfo.ID != "" { + model = modelInfo.ID + } + support = modelInfo.Thinking + } + if support == nil { + return value + } + + // Auto value (-1) passes through without clamping. + if value == -1 { + return value + } + + min, max := support.Min, support.Max + if value == 0 && !support.ZeroAllowed { + log.WithFields(log.Fields{ + "provider": provider, + "model": model, + "original_value": value, + "clamped_to": min, + "min": min, + "max": max, + }).Warn("thinking: budget zero not allowed |") + return min + } + + // Some models are level-only and do not define numeric budget ranges. + if min == 0 && max == 0 { + return value + } + + if value < min { + if value == 0 && support.ZeroAllowed { + return 0 + } + logClamp(provider, model, value, min, min, max) + return min + } + if value > max { + logClamp(provider, model, value, max, min, max) + return max + } + return value +} + +func isLevelSupported(level string, supported []string) bool { + for _, s := range supported { + if strings.EqualFold(level, strings.TrimSpace(s)) { + return true + } + } + return false +} + +func levelIndex(level string) int { + for i, l := range standardLevelOrder { + if strings.EqualFold(level, string(l)) { + return i + } + } + return -1 +} + +func normalizeLevels(levels []string) []string { + out := make([]string, len(levels)) + for i, l := range levels { + out[i] = strings.ToLower(strings.TrimSpace(l)) + } + return out +} + +func isBudgetBasedProvider(provider string) bool { + switch provider { + case "gemini", "gemini-cli", "antigravity", "claude": + return true + default: + return false + } +} + +func isLevelBasedProvider(provider string) bool { + switch provider { + case "openai", "openai-response", "codex": + return true + default: + return false + } +} + +func isGeminiFamily(provider string) bool { + switch provider { + case "gemini", "gemini-cli", "antigravity": + return true + default: + return false + } +} + +func isSameProviderFamily(from, to string) bool { + if from == to { + return true + } + return isGeminiFamily(from) && isGeminiFamily(to) +} + +func abs(x int) int { + if x < 0 { + return -x + } + return x +} + +func logClamp(provider, model string, original, clampedTo, min, max int) { + log.WithFields(log.Fields{ + "provider": provider, + "model": model, + "original_value": original, + "min": min, + "max": max, + "clamped_to": clampedTo, + }).Debug("thinking: budget clamped |") +} diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go index 13ddfe5a..4216393f 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request.go @@ -12,6 +12,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/cache" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" @@ -122,7 +123,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ contentTypeResult := contentResult.Get("type") if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" { // Use GetThinkingText to handle wrapped thinking objects - thinkingText := util.GetThinkingText(contentResult) + thinkingText := thinking.GetThinkingText(contentResult) + signatureResult := contentResult.Get("signature") + clientSignature := "" + if signatureResult.Exists() && signatureResult.String() != "" { + clientSignature = signatureResult.String() + } // Always try cached signature first (more reliable than client-provided) // Client may send stale or invalid signatures from different sessions @@ -380,12 +386,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ } // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled - if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) } } } diff --git a/internal/translator/antigravity/claude/antigravity_claude_request_test.go b/internal/translator/antigravity/claude/antigravity_claude_request_test.go index 8c045620..6e1fed1f 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go @@ -380,8 +380,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingConfig(t *testing.T) { if thinkingConfig.Get("thinkingBudget").Int() != 8000 { t.Errorf("Expected thinkingBudget 8000, got %d", thinkingConfig.Get("thinkingBudget").Int()) } - if !thinkingConfig.Get("include_thoughts").Bool() { - t.Error("include_thoughts should be true") + if !thinkingConfig.Get("includeThoughts").Bool() { + t.Error("includeThoughts should be true") } } else { t.Log("thinkingConfig not present - model may not be registered in test registry") diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index 7ca01b07..94546bda 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -35,66 +35,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ // Model out, _ = sjson.SetBytes(out, "model", modelName) - // Reasoning effort -> thinkingBudget/include_thoughts - // Note: OpenAI official fields take precedence over extra_body.google.thinking_config + // Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig. + // Inline translation-only mapping; capability checks happen later in ApplyThinking. re := gjson.GetBytes(rawJSON, "reasoning_effort") - hasOfficialThinking := re.Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if re.Exists() { effort := strings.ToLower(strings.TrimSpace(re.String())) - if util.IsGemini3Model(modelName) { - switch effort { - case "none": - out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig") - case "auto": - includeThoughts := true - out = util.ApplyGeminiCLIThinkingLevel(out, "", &includeThoughts) - default: - if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok { - out = util.ApplyGeminiCLIThinkingLevel(out, level, nil) - } - } - } else if !util.ModelUsesThinkingLevels(modelName) { - out = util.ApplyReasoningEffortToGeminiCLI(out, effort) - } - } - - // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) - // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { - var setBudget bool - var budget int - - if v := tc.Get("thinkingBudget"); v.Exists() { - budget = int(v.Int()) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - setBudget = true - } else if v := tc.Get("thinking_budget"); v.Exists() { - budget = int(v.Int()) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - setBudget = true - } - - if v := tc.Get("includeThoughts"); v.Exists() { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if v := tc.Get("include_thoughts"); v.Exists() { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && budget != 0 { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) - } - } - } - - // Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens - // This allows Claude Code and other Claude API clients to pass thinking configuration - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) { - if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { - if t.Get("type").String() == "enabled" { - if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { - budget := int(b.Int()) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) - } + if effort != "" { + thinkingPath := "request.generationConfig.thinkingConfig" + if effort == "auto" { + out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1) + out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true) + } else { + out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort) + out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none") } } } @@ -179,6 +132,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ } } + systemPartIndex := 0 for i := 0; i < len(arr); i++ { m := arr[i] role := m.Get("role").String() @@ -188,16 +142,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ // system -> request.systemInstruction as a user message style if content.Type == gjson.String { out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user") - out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String()) + systemPartIndex++ } else if content.IsObject() && content.Get("type").String() == "text" { out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user") - out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String()) + systemPartIndex++ } else if content.IsArray() { contents := content.Array() if len(contents) > 0 { out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user") for j := 0; j < len(contents); j++ { - out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String()) + systemPartIndex++ } } } @@ -212,7 +169,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ for _, item := range items { switch item.Get("type").String() { case "text": - node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String()) + text := item.Get("text").String() + if text != "" { + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text) + } p++ case "image_url": imageURL := item.Get("image_url.url").String() @@ -256,6 +216,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ for _, item := range content.Array() { switch item.Get("type").String() { case "text": + text := item.Get("text").String() + if text != "" { + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text) + } p++ case "image_url": // If the assistant returned an inline data URL, preserve it for history fidelity. diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go index faf1f9d1..32f2d847 100644 --- a/internal/translator/claude/gemini/claude_gemini_request.go +++ b/internal/translator/claude/gemini/claude_gemini_request.go @@ -15,6 +15,7 @@ import ( "strings" "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -114,15 +115,40 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream } } // Include thoughts configuration for reasoning process visibility - // Only apply for models that support thinking and use numeric budgets, not discrete levels. - if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - // Check for thinkingBudget first - if present, enable thinking with budget - if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 { + // Translator only does format conversion, ApplyThinking handles model capability validation. + if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() { + if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() { + level := strings.ToLower(strings.TrimSpace(thinkingLevel.String())) + switch level { + case "": + case "none": + out, _ = sjson.Set(out, "thinking.type", "disabled") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + case "auto": + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + default: + if budget, ok := thinking.ConvertLevelToBudget(level); ok { + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + } + } + } else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { + budget := int(thinkingBudget.Int()) + switch budget { + case 0: + out, _ = sjson.Set(out, "thinking.type", "disabled") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + case -1: + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + default: + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + } + } else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True { out, _ = sjson.Set(out, "thinking.type", "enabled") - normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int())) - out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget) } else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True { - // Fallback to include_thoughts if no budget specified out, _ = sjson.Set(out, "thinking.type", "enabled") } } diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go index ea04a97a..79dc9c90 100644 --- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go +++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go @@ -15,7 +15,7 @@ import ( "strings" "github.com/google/uuid" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -65,10 +65,11 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream root := gjson.ParseBytes(rawJSON) - if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + // Convert OpenAI reasoning_effort to Claude thinking config. + if v := root.Get("reasoning_effort"); v.Exists() { effort := strings.ToLower(strings.TrimSpace(v.String())) if effort != "" { - budget, ok := util.ThinkingEffortToBudget(modelName, effort) + budget, ok := thinking.ConvertLevelToBudget(effort) if ok { switch budget { case 0: @@ -137,17 +138,35 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream // Process messages and transform them to Claude Code format if messages := root.Get("messages"); messages.Exists() && messages.IsArray() { + messageIndex := 0 + systemMessageIndex := -1 messages.ForEach(func(_, message gjson.Result) bool { role := message.Get("role").String() contentResult := message.Get("content") switch role { - case "system", "user", "assistant": - // Create Claude Code message with appropriate role mapping - if role == "system" { - role = "user" + case "system": + if systemMessageIndex == -1 { + systemMsg := `{"role":"user","content":[]}` + out, _ = sjson.SetRaw(out, "messages.-1", systemMsg) + systemMessageIndex = messageIndex + messageIndex++ } - + if contentResult.Exists() && contentResult.Type == gjson.String && contentResult.String() != "" { + textPart := `{"type":"text","text":""}` + textPart, _ = sjson.Set(textPart, "text", contentResult.String()) + out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart) + } else if contentResult.Exists() && contentResult.IsArray() { + contentResult.ForEach(func(_, part gjson.Result) bool { + if part.Get("type").String() == "text" { + textPart := `{"type":"text","text":""}` + textPart, _ = sjson.Set(textPart, "text", part.Get("text").String()) + out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart) + } + return true + }) + } + case "user", "assistant": msg := `{"role":"","content":[]}` msg, _ = sjson.Set(msg, "role", role) @@ -226,6 +245,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream } out, _ = sjson.SetRaw(out, "messages.-1", msg) + messageIndex++ case "tool": // Handle tool result messages conversion @@ -236,6 +256,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream msg, _ = sjson.Set(msg, "content.0.tool_use_id", toolCallID) msg, _ = sjson.Set(msg, "content.0.content", content) out, _ = sjson.SetRaw(out, "messages.-1", msg) + messageIndex++ } return true }) diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go index d4b7e05f..5cbe23bf 100644 --- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go +++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go @@ -10,7 +10,7 @@ import ( "strings" "github.com/google/uuid" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -53,10 +53,11 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte root := gjson.ParseBytes(rawJSON) - if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + // Convert OpenAI Responses reasoning.effort to Claude thinking config. + if v := root.Get("reasoning.effort"); v.Exists() { effort := strings.ToLower(strings.TrimSpace(v.String())) if effort != "" { - budget, ok := util.ThinkingEffortToBudget(modelName, effort) + budget, ok := thinking.ConvertLevelToBudget(effort) if ok { switch budget { case 0: diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_response.go b/internal/translator/claude/openai/responses/claude_openai-responses_response.go index 593ec287..e77b09e1 100644 --- a/internal/translator/claude/openai/responses/claude_openai-responses_response.go +++ b/internal/translator/claude/openai/responses/claude_openai-responses_response.go @@ -251,6 +251,7 @@ func ConvertClaudeResponseToOpenAIResponses(ctx context.Context, modelName strin itemDone, _ = sjson.Set(itemDone, "item.id", fmt.Sprintf("fc_%s", st.CurrentFCID)) itemDone, _ = sjson.Set(itemDone, "item.arguments", args) itemDone, _ = sjson.Set(itemDone, "item.call_id", st.CurrentFCID) + itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx]) out = append(out, emitEvent("response.output_item.done", itemDone)) st.InFuncBlock = false } else if st.ReasoningActive { diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go index 59cd5ecf..f0f5d867 100644 --- a/internal/translator/codex/claude/codex_claude_request.go +++ b/internal/translator/codex/claude/codex_claude_request.go @@ -12,7 +12,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -51,7 +51,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) systemsResult := rootResult.Get("system") if systemsResult.IsArray() { systemResults := systemsResult.Array() - message := `{"type":"message","role":"user","content":[]}` + message := `{"type":"message","role":"developer","content":[]}` for i := 0; i < len(systemResults); i++ { systemResult := systemResults[i] systemTypeResult := systemResult.Get("type") @@ -217,21 +217,19 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) // Add additional configuration parameters for the Codex API. template, _ = sjson.Set(template, "parallel_tool_calls", true) - // Convert thinking.budget_tokens to reasoning.effort for level-based models - reasoningEffort := "medium" // default - if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() { - switch thinking.Get("type").String() { + // Convert thinking.budget_tokens to reasoning.effort. + reasoningEffort := "medium" + if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() { + switch thinkingConfig.Get("type").String() { case "enabled": - if util.ModelUsesThinkingLevels(modelName) { - if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() { - budget := int(budgetTokens.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { - reasoningEffort = effort - } + if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() { + budget := int(budgetTokens.Int()) + if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" { + reasoningEffort = effort } } case "disabled": - if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" { reasoningEffort = effort } } @@ -243,21 +241,23 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) template, _ = sjson.Set(template, "include", []string{"reasoning.encrypted_content"}) // Add a first message to ignore system instructions and ensure proper execution. - inputResult := gjson.Get(template, "input") - if inputResult.Exists() && inputResult.IsArray() { - inputResults := inputResult.Array() - newInput := "[]" - for i := 0; i < len(inputResults); i++ { - if i == 0 { - firstText := inputResults[i].Get("content.0.text") - firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!" - if firstText.Exists() && firstText.String() != firstInstructions { - newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`) + if misc.GetCodexInstructionsEnabled() { + inputResult := gjson.Get(template, "input") + if inputResult.Exists() && inputResult.IsArray() { + inputResults := inputResult.Array() + newInput := "[]" + for i := 0; i < len(inputResults); i++ { + if i == 0 { + firstText := inputResults[i].Get("content.0.text") + firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!" + if firstText.Exists() && firstText.String() != firstInstructions { + newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`) + } } + newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw) } - newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw) + template, _ = sjson.SetRaw(template, "input", newInput) } - template, _ = sjson.SetRaw(template, "input", newInput) } return []byte(template) diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go index c700ef84..5223cd94 100644 --- a/internal/translator/codex/claude/codex_claude_response.go +++ b/internal/translator/codex/claude/codex_claude_response.go @@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa } else { template, _ = sjson.Set(template, "delta.stop_reason", "end_turn") } - template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int()) - template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage")) + template, _ = sjson.Set(template, "usage.input_tokens", inputTokens) + template, _ = sjson.Set(template, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens) + } output = "event: message_delta\n" output += fmt.Sprintf("data: %s\n\n", template) @@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}` out, _ = sjson.Set(out, "id", responseData.Get("id").String()) out, _ = sjson.Set(out, "model", responseData.Get("model").String()) - out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage")) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) + } hasToolCall := false @@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw) } - if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int()) + return out +} + +func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) { + if !usage.Exists() || usage.Type == gjson.Null { + return 0, 0, 0 } - return out + inputTokens := usage.Get("input_tokens").Int() + outputTokens := usage.Get("output_tokens").Int() + cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int() + + if cachedTokens > 0 { + if inputTokens >= cachedTokens { + inputTokens -= cachedTokens + } else { + inputTokens = 0 + } + } + + return inputTokens, outputTokens, cachedTokens } // buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools. diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go index 944b95f6..342c5b1a 100644 --- a/internal/translator/codex/gemini/codex_gemini_request.go +++ b/internal/translator/codex/gemini/codex_gemini_request.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -93,7 +94,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool) // System instruction -> as a user message with input_text parts sysParts := root.Get("system_instruction.parts") if sysParts.IsArray() { - msg := `{"type":"message","role":"user","content":[]}` + msg := `{"type":"message","role":"developer","content":[]}` arr := sysParts.Array() for i := 0; i < len(arr); i++ { p := arr[i] @@ -247,21 +248,28 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool) // Fixed flags aligning with Codex expectations out, _ = sjson.Set(out, "parallel_tool_calls", true) - // Convert thinkingBudget to reasoning.effort for level-based models - reasoningEffort := "medium" // default + // Convert Gemini thinkingConfig to Codex reasoning.effort. + effortSet := false if genConfig := root.Get("generationConfig"); genConfig.Exists() { if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() { - if util.ModelUsesThinkingLevels(modelName) { - if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { - budget := int(thinkingBudget.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { - reasoningEffort = effort - } + if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() { + effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String())) + if effort != "" { + out, _ = sjson.Set(out, "reasoning.effort", effort) + effortSet = true + } + } else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { + if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok { + out, _ = sjson.Set(out, "reasoning.effort", effort) + effortSet = true } } } } - out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort) + if !effortSet { + // No thinking config, set default effort + out, _ = sjson.Set(out, "reasoning.effort", "medium") + } out, _ = sjson.Set(out, "reasoning.summary", "auto") out, _ = sjson.Set(out, "stream", true) out, _ = sjson.Set(out, "store", false) diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request.go b/internal/translator/codex/openai/chat-completions/codex_openai_request.go index b68d2792..40f56f88 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go @@ -33,7 +33,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b rawJSON := bytes.Clone(inputRawJSON) userAgent := misc.ExtractCodexUserAgent(rawJSON) // Start with empty JSON object - out := `{}` + out := `{"instructions":""}` // Stream must be set to true out, _ = sjson.Set(out, "stream", stream) @@ -98,7 +98,9 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b // Extract system instructions from first system message (string or text object) messages := gjson.GetBytes(rawJSON, "messages") _, instructions := misc.CodexInstructionsForModel(modelName, "", userAgent) - out, _ = sjson.Set(out, "instructions", instructions) + if misc.GetCodexInstructionsEnabled() { + out, _ = sjson.Set(out, "instructions", instructions) + } // if messages.IsArray() { // arr := messages.Array() // for i := 0; i < len(arr); i++ { @@ -141,7 +143,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b msg := `{}` msg, _ = sjson.Set(msg, "type", "message") if role == "system" { - msg, _ = sjson.Set(msg, "role", "user") + msg, _ = sjson.Set(msg, "role", "developer") } else { msg, _ = sjson.Set(msg, "role", role) } diff --git a/internal/translator/codex/openai/responses/codex_openai-responses_request.go b/internal/translator/codex/openai/responses/codex_openai-responses_request.go index e1691a5b..33dbf112 100644 --- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go +++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go @@ -74,6 +74,11 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte, } if hasOfficialInstructions { + newInput := "[]" + for _, item := range inputResults { + newInput, _ = sjson.SetRaw(newInput, "-1", item.Raw) + } + rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(newInput)) return rawJSON } // log.Debugf("instructions not matched, %s\n", originalInstructions) diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index 66e0385f..f4a51e8b 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -10,7 +10,6 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -160,12 +159,12 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] } // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled - if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) } } } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index 98188835..af161b5c 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -35,37 +35,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo // Model out, _ = sjson.SetBytes(out, "model", modelName) - // Reasoning effort -> thinkingBudget/include_thoughts - // Note: OpenAI official fields take precedence over extra_body.google.thinking_config + // Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig. + // Inline translation-only mapping; capability checks happen later in ApplyThinking. re := gjson.GetBytes(rawJSON, "reasoning_effort") - hasOfficialThinking := re.Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - out = util.ApplyReasoningEffortToGeminiCLI(out, re.String()) - } - - // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) - // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { - var setBudget bool - var budget int - - if v := tc.Get("thinkingBudget"); v.Exists() { - budget = int(v.Int()) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - setBudget = true - } else if v := tc.Get("thinking_budget"); v.Exists() { - budget = int(v.Int()) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - setBudget = true - } - - if v := tc.Get("includeThoughts"); v.Exists() { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if v := tc.Get("include_thoughts"); v.Exists() { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && budget != 0 { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + if re.Exists() { + effort := strings.ToLower(strings.TrimSpace(re.String())) + if effort != "" { + thinkingPath := "request.generationConfig.thinkingConfig" + if effort == "auto" { + out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1) + out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true) + } else { + out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort) + out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none") } } } @@ -147,6 +129,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo } } + systemPartIndex := 0 for i := 0; i < len(arr); i++ { m := arr[i] role := m.Get("role").String() @@ -156,16 +139,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo // system -> request.systemInstruction as a user message style if content.Type == gjson.String { out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user") - out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String()) + systemPartIndex++ } else if content.IsObject() && content.Get("type").String() == "text" { out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user") - out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String()) + systemPartIndex++ } else if content.IsArray() { contents := content.Array() if len(contents) > 0 { out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user") for j := 0; j < len(contents); j++ { - out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String()) + systemPartIndex++ } } } diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index c410aad8..0d5361a5 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -10,7 +10,6 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -153,13 +152,13 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled - // Only apply for models that use numeric budgets, not discrete levels. - if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + // Translator only does format conversion, ApplyThinking handles model capability validation. + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true) } } } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 57e150c1..7ad005b9 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -35,55 +35,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) // Model out, _ = sjson.SetBytes(out, "model", modelName) - // Reasoning effort -> thinkingBudget/include_thoughts - // Note: OpenAI official fields take precedence over extra_body.google.thinking_config - // Only apply numeric budgets for models that use budgets (not discrete levels) to avoid - // incorrectly applying thinkingBudget for level-based models like gpt-5. Gemini 3 models - // use thinkingLevel/includeThoughts instead. + // Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig. + // Inline translation-only mapping; capability checks happen later in ApplyThinking. re := gjson.GetBytes(rawJSON, "reasoning_effort") - hasOfficialThinking := re.Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if re.Exists() { effort := strings.ToLower(strings.TrimSpace(re.String())) - if util.IsGemini3Model(modelName) { - switch effort { - case "none": - out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig") - case "auto": - includeThoughts := true - out = util.ApplyGeminiThinkingLevel(out, "", &includeThoughts) - default: - if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok { - out = util.ApplyGeminiThinkingLevel(out, level, nil) - } - } - } else if !util.ModelUsesThinkingLevels(modelName) { - out = util.ApplyReasoningEffortToGemini(out, effort) - } - } - - // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) - // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { - var setBudget bool - var budget int - - if v := tc.Get("thinkingBudget"); v.Exists() { - budget = int(v.Int()) - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) - setBudget = true - } else if v := tc.Get("thinking_budget"); v.Exists() { - budget = int(v.Int()) - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) - setBudget = true - } - - if v := tc.Get("includeThoughts"); v.Exists() { - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if v := tc.Get("include_thoughts"); v.Exists() { - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && budget != 0 { - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) + if effort != "" { + thinkingPath := "generationConfig.thinkingConfig" + if effort == "auto" { + out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1) + out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true) + } else { + out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort) + out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none") } } } @@ -165,6 +129,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } } + systemPartIndex := 0 for i := 0; i < len(arr); i++ { m := arr[i] role := m.Get("role").String() @@ -174,16 +139,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) // system -> system_instruction as a user message style if content.Type == gjson.String { out, _ = sjson.SetBytes(out, "system_instruction.role", "user") - out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.String()) + systemPartIndex++ } else if content.IsObject() && content.Get("type").String() == "text" { out, _ = sjson.SetBytes(out, "system_instruction.role", "user") - out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.Get("text").String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.Get("text").String()) + systemPartIndex++ } else if content.IsArray() { contents := content.Array() if len(contents) > 0 { - out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user") + out, _ = sjson.SetBytes(out, "system_instruction.role", "user") for j := 0; j < len(contents); j++ { - out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String()) + out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String()) + systemPartIndex++ } } } @@ -198,7 +166,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) for _, item := range items { switch item.Get("type").String() { case "text": - node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String()) + text := item.Get("text").String() + if text != "" { + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text) + } p++ case "image_url": imageURL := item.Get("image_url.url").String() @@ -243,6 +214,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) for _, item := range content.Array() { switch item.Get("type").String() { case "text": + text := item.Get("text").String() + if text != "" { + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text) + } p++ case "image_url": // If the assistant returned an inline data URL, preserve it for history fidelity. diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 1bf67e7f..41279977 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -5,7 +5,6 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -388,31 +387,19 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences) } - // OpenAI official reasoning fields take precedence - // Only convert for models that use numeric budgets (not discrete levels). - hasOfficialThinking := root.Get("reasoning.effort").Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - reasoningEffort := root.Get("reasoning.effort") - out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String())) - } - - // Cherry Studio extension (applies only when official fields are missing) - // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { - var setBudget bool - var budget int - if v := tc.Get("thinking_budget"); v.Exists() { - budget = int(v.Int()) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) - setBudget = true - } - if v := tc.Get("include_thoughts"); v.Exists() { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget { - if budget != 0 { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) - } + // Apply thinking configuration: convert OpenAI Responses API reasoning.effort to Gemini thinkingConfig. + // Inline translation-only mapping; capability checks happen later in ApplyThinking. + re := root.Get("reasoning.effort") + if re.Exists() { + effort := strings.ToLower(strings.TrimSpace(re.String())) + if effort != "" { + thinkingPath := "generationConfig.thinkingConfig" + if effort == "auto" { + out, _ = sjson.Set(out, thinkingPath+".thinkingBudget", -1) + out, _ = sjson.Set(out, thinkingPath+".includeThoughts", true) + } else { + out, _ = sjson.Set(out, thinkingPath+".thinkingLevel", effort) + out, _ = sjson.Set(out, thinkingPath+".includeThoughts", effort != "none") } } } diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go index cc7fd01e..c268ec62 100644 --- a/internal/translator/openai/claude/openai_claude_request.go +++ b/internal/translator/openai/claude/openai_claude_request.go @@ -9,7 +9,7 @@ import ( "bytes" "strings" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -61,23 +61,23 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream out, _ = sjson.Set(out, "stream", stream) // Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort - if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() { - if thinkingType := thinking.Get("type"); thinkingType.Exists() { + if thinkingConfig := root.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() { + if thinkingType := thinkingConfig.Get("type"); thinkingType.Exists() { switch thinkingType.String() { case "enabled": - if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() { + if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() { budget := int(budgetTokens.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } else { // No budget_tokens specified, default to "auto" for enabled thinking - if effort, ok := util.ThinkingBudgetToEffort(modelName, -1); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(-1); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } case "disabled": - if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } @@ -88,7 +88,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream var messagesJSON = "[]" // Handle system message first - systemMsgJSON := `{"role":"system","content":[{"type":"text","text":"Use ANY tool, the parameters MUST accord with RFC 8259 (The JavaScript Object Notation (JSON) Data Interchange Format), the keys and value MUST be enclosed in double quotes."}]}` + systemMsgJSON := `{"role":"system","content":[]}` if system := root.Get("system"); system.Exists() { if system.Type == gjson.String { if system.String() != "" { @@ -129,7 +129,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream case "thinking": // Only map thinking to reasoning_content for assistant messages (security: prevent injection) if role == "assistant" { - thinkingText := util.GetThinkingText(part) + thinkingText := thinking.GetThinkingText(part) // Skip empty or whitespace-only thinking if strings.TrimSpace(thinkingText) != "" { reasoningParts = append(reasoningParts, thinkingText) diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go index 1629545d..b6e0d005 100644 --- a/internal/translator/openai/claude/openai_claude_response.go +++ b/internal/translator/openai/claude/openai_claude_response.go @@ -289,21 +289,17 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI // Only process if usage has actual values (not null) if param.FinishReason != "" { usage := root.Get("usage") - var inputTokens, outputTokens int64 + var inputTokens, outputTokens, cachedTokens int64 if usage.Exists() && usage.Type != gjson.Null { - // Check if usage has actual token counts - promptTokens := usage.Get("prompt_tokens") - completionTokens := usage.Get("completion_tokens") - - if promptTokens.Exists() && completionTokens.Exists() { - inputTokens = promptTokens.Int() - outputTokens = completionTokens.Int() - } + inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage) // Send message_delta with usage messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}` messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason)) messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens) messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens) + } results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n") param.MessageDeltaSent = true @@ -423,13 +419,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string { // Set usage information if usage := root.Get("usage"); usage.Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", usage.Get("prompt_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", usage.Get("completion_tokens").Int()) - reasoningTokens := int64(0) - if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() { - reasoningTokens = v.Int() + inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) } - out, _ = sjson.Set(out, "usage.reasoning_tokens", reasoningTokens) } return []string{out} @@ -674,8 +669,12 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina } if respUsage := root.Get("usage"); respUsage.Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", respUsage.Get("prompt_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", respUsage.Get("completion_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) + } } if !stopReasonSet { @@ -692,3 +691,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina func ClaudeTokenCount(ctx context.Context, count int64) string { return fmt.Sprintf(`{"input_tokens":%d}`, count) } + +func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) { + if !usage.Exists() || usage.Type == gjson.Null { + return 0, 0, 0 + } + + inputTokens := usage.Get("prompt_tokens").Int() + outputTokens := usage.Get("completion_tokens").Int() + cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int() + + if cachedTokens > 0 { + if inputTokens >= cachedTokens { + inputTokens -= cachedTokens + } else { + inputTokens = 0 + } + } + + return inputTokens, outputTokens, cachedTokens +} diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go index f51d914b..6e9bf637 100644 --- a/internal/translator/openai/gemini/openai_gemini_request.go +++ b/internal/translator/openai/gemini/openai_gemini_request.go @@ -12,7 +12,7 @@ import ( "math/big" "strings" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -77,12 +77,15 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream } } - // Convert thinkingBudget to reasoning_effort - // Always perform conversion to support allowCompat models that may not be in registry + // Map Gemini thinkingConfig to OpenAI reasoning_effort. if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() { - if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { - budget := int(thinkingBudget.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() { + effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String())) + if effort != "" { + out, _ = sjson.Set(out, "reasoning_effort", effort) + } + } else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { + if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok { out, _ = sjson.Set(out, "reasoning_effort", effort) } } diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response.go b/internal/translator/openai/openai/responses/openai_openai-responses_response.go index 17233ca5..15152852 100644 --- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go +++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go @@ -12,6 +12,10 @@ import ( "github.com/tidwall/sjson" ) +type oaiToResponsesStateReasoning struct { + ReasoningID string + ReasoningData string +} type oaiToResponsesState struct { Seq int ResponseID string @@ -23,6 +27,7 @@ type oaiToResponsesState struct { // Per-output message text buffers by index MsgTextBuf map[int]*strings.Builder ReasoningBuf strings.Builder + Reasonings []oaiToResponsesStateReasoning FuncArgsBuf map[int]*strings.Builder // index -> args FuncNames map[int]string // index -> name FuncCallIDs map[int]string // index -> call_id @@ -63,6 +68,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, MsgItemDone: make(map[int]bool), FuncArgsDone: make(map[int]bool), FuncItemDone: make(map[int]bool), + Reasonings: make([]oaiToResponsesStateReasoning, 0), } } st := (*param).(*oaiToResponsesState) @@ -157,6 +163,31 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, st.Started = true } + stopReasoning := func(text string) { + // Emit reasoning done events + textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}` + textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq()) + textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID) + textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex) + textDone, _ = sjson.Set(textDone, "text", text) + out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone)) + partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}` + partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq()) + partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID) + partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex) + partDone, _ = sjson.Set(partDone, "part.text", text) + out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone)) + outputItemDone := `{"type":"response.output_item.done","item":{"id":"","type":"reasoning","encrypted_content":"","summary":[{"type":"summary_text","text":""}]},"output_index":0,"sequence_number":0}` + outputItemDone, _ = sjson.Set(outputItemDone, "sequence_number", nextSeq()) + outputItemDone, _ = sjson.Set(outputItemDone, "item.id", st.ReasoningID) + outputItemDone, _ = sjson.Set(outputItemDone, "output_index", st.ReasoningIndex) + outputItemDone, _ = sjson.Set(outputItemDone, "item.summary.text", text) + out = append(out, emitRespEvent("response.output_item.done", outputItemDone)) + + st.Reasonings = append(st.Reasonings, oaiToResponsesStateReasoning{ReasoningID: st.ReasoningID, ReasoningData: text}) + st.ReasoningID = "" + } + // choices[].delta content / tool_calls / reasoning_content if choices := root.Get("choices"); choices.Exists() && choices.IsArray() { choices.ForEach(func(_, choice gjson.Result) bool { @@ -165,6 +196,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, if delta.Exists() { if c := delta.Get("content"); c.Exists() && c.String() != "" { // Ensure the message item and its first content part are announced before any text deltas + if st.ReasoningID != "" { + stopReasoning(st.ReasoningBuf.String()) + st.ReasoningBuf.Reset() + } if !st.MsgItemAdded[idx] { item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"in_progress","content":[],"role":"assistant"}}` item, _ = sjson.Set(item, "sequence_number", nextSeq()) @@ -226,6 +261,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, // tool calls if tcs := delta.Get("tool_calls"); tcs.Exists() && tcs.IsArray() { + if st.ReasoningID != "" { + stopReasoning(st.ReasoningBuf.String()) + st.ReasoningBuf.Reset() + } // Before emitting any function events, if a message is open for this index, // close its text/content to match Codex expected ordering. if st.MsgItemAdded[idx] && !st.MsgItemDone[idx] { @@ -361,17 +400,8 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, } if st.ReasoningID != "" { - // Emit reasoning done events - textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}` - textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq()) - textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID) - textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex) - out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone)) - partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}` - partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq()) - partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID) - partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex) - out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone)) + stopReasoning(st.ReasoningBuf.String()) + st.ReasoningBuf.Reset() } // Emit function call done events for any active function calls @@ -485,11 +515,13 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, } // Build response.output using aggregated buffers outputsWrapper := `{"arr":[]}` - if st.ReasoningBuf.Len() > 0 { - item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}` - item, _ = sjson.Set(item, "id", st.ReasoningID) - item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String()) - outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item) + if len(st.Reasonings) > 0 { + for _, r := range st.Reasonings { + item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}` + item, _ = sjson.Set(item, "id", r.ReasoningID) + item, _ = sjson.Set(item, "summary.0.text", r.ReasoningData) + outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item) + } } // Append message items in ascending index order if len(st.MsgItemAdded) > 0 { diff --git a/internal/util/claude_thinking.go b/internal/util/claude_thinking.go deleted file mode 100644 index 6176f57d..00000000 --- a/internal/util/claude_thinking.go +++ /dev/null @@ -1,49 +0,0 @@ -package util - -import ( - "github.com/tidwall/gjson" - "github.com/tidwall/sjson" -) - -// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload. -// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget. -// If budget is nil or the payload already has thinking config, it returns the payload unchanged. -func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte { - if budget == nil { - return body - } - if gjson.GetBytes(body, "thinking").Exists() { - return body - } - if *budget <= 0 { - return body - } - updated := body - updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled") - updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget) - return updated -} - -// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models. -// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget. -// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched. -func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) { - if !ModelSupportsThinking(modelName) { - return nil, false - } - budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata) - if !matched { - return nil, false - } - if include != nil && !*include { - return nil, true - } - if budget == nil { - return nil, true - } - normalized := NormalizeThinkingBudget(modelName, *budget) - if normalized <= 0 { - return nil, true - } - return &normalized, true -} diff --git a/internal/util/gemini_schema.go b/internal/util/gemini_schema.go index 38d3773e..c7cb0f40 100644 --- a/internal/util/gemini_schema.go +++ b/internal/util/gemini_schema.go @@ -19,6 +19,7 @@ func CleanJSONSchemaForAntigravity(jsonStr string) string { // Phase 1: Convert and add hints jsonStr = convertRefsToHints(jsonStr) jsonStr = convertConstToEnum(jsonStr) + jsonStr = convertEnumValuesToStrings(jsonStr) jsonStr = addEnumHints(jsonStr) jsonStr = addAdditionalPropertiesHints(jsonStr) jsonStr = moveConstraintsToDescription(jsonStr) @@ -77,6 +78,33 @@ func convertConstToEnum(jsonStr string) string { return jsonStr } +// convertEnumValuesToStrings ensures all enum values are strings. +// Gemini API requires enum values to be of type string, not numbers or booleans. +func convertEnumValuesToStrings(jsonStr string) string { + for _, p := range findPaths(jsonStr, "enum") { + arr := gjson.Get(jsonStr, p) + if !arr.IsArray() { + continue + } + + var stringVals []string + needsConversion := false + for _, item := range arr.Array() { + // Check if any value is not a string + if item.Type != gjson.String { + needsConversion = true + } + stringVals = append(stringVals, item.String()) + } + + // Only update if we found non-string values + if needsConversion { + jsonStr, _ = sjson.Set(jsonStr, p, stringVals) + } + } + return jsonStr +} + func addEnumHints(jsonStr string) string { for _, p := range findPaths(jsonStr, "enum") { arr := gjson.Get(jsonStr, p) diff --git a/internal/util/gemini_schema_test.go b/internal/util/gemini_schema_test.go index 60335f22..ca77225e 100644 --- a/internal/util/gemini_schema_test.go +++ b/internal/util/gemini_schema_test.go @@ -818,3 +818,54 @@ func TestCleanJSONSchemaForAntigravity_MultipleFormats(t *testing.T) { t.Errorf("date-time format hint should be added, got: %s", result) } } + +func TestCleanJSONSchemaForAntigravity_NumericEnumToString(t *testing.T) { + // Gemini API requires enum values to be strings, not numbers + input := `{ + "type": "object", + "properties": { + "priority": {"type": "integer", "enum": [0, 1, 2]}, + "level": {"type": "number", "enum": [1.5, 2.5, 3.5]}, + "status": {"type": "string", "enum": ["active", "inactive"]} + } + }` + + result := CleanJSONSchemaForAntigravity(input) + + // Numeric enum values should be converted to strings + if strings.Contains(result, `"enum":[0,1,2]`) { + t.Errorf("Integer enum values should be converted to strings, got: %s", result) + } + if strings.Contains(result, `"enum":[1.5,2.5,3.5]`) { + t.Errorf("Float enum values should be converted to strings, got: %s", result) + } + // Should contain string versions + if !strings.Contains(result, `"0"`) || !strings.Contains(result, `"1"`) || !strings.Contains(result, `"2"`) { + t.Errorf("Integer enum values should be converted to string format, got: %s", result) + } + // String enum values should remain unchanged + if !strings.Contains(result, `"active"`) || !strings.Contains(result, `"inactive"`) { + t.Errorf("String enum values should remain unchanged, got: %s", result) + } +} + +func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) { + // Boolean enum values should also be converted to strings + input := `{ + "type": "object", + "properties": { + "enabled": {"type": "boolean", "enum": [true, false]} + } + }` + + result := CleanJSONSchemaForAntigravity(input) + + // Boolean enum values should be converted to strings + if strings.Contains(result, `"enum":[true,false]`) { + t.Errorf("Boolean enum values should be converted to strings, got: %s", result) + } + // Should contain string versions "true" and "false" + if !strings.Contains(result, `"true"`) || !strings.Contains(result, `"false"`) { + t.Errorf("Boolean enum values should be converted to string format, got: %s", result) + } +} diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go deleted file mode 100644 index 8e76f3bc..00000000 --- a/internal/util/gemini_thinking.go +++ /dev/null @@ -1,617 +0,0 @@ -package util - -import ( - "regexp" - "strings" - - "github.com/tidwall/gjson" - "github.com/tidwall/sjson" -) - -const ( - GeminiThinkingBudgetMetadataKey = "gemini_thinking_budget" - GeminiIncludeThoughtsMetadataKey = "gemini_include_thoughts" - GeminiOriginalModelMetadataKey = "gemini_original_model" -) - -// Gemini model family detection patterns -var ( - gemini3Pattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`) - gemini3ProPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]pro`) - gemini3FlashPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]flash`) - gemini25Pattern = regexp.MustCompile(`(?i)^gemini[_-]?2\.5[_-]`) -) - -// IsGemini3Model returns true if the model is a Gemini 3 family model. -// Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number). -func IsGemini3Model(model string) bool { - return gemini3Pattern.MatchString(model) -} - -// IsGemini3ProModel returns true if the model is a Gemini 3 Pro variant. -// Gemini 3 Pro supports thinkingLevel: "low", "high" (default: "high") -func IsGemini3ProModel(model string) bool { - return gemini3ProPattern.MatchString(model) -} - -// IsGemini3FlashModel returns true if the model is a Gemini 3 Flash variant. -// Gemini 3 Flash supports thinkingLevel: "minimal", "low", "medium", "high" (default: "high") -func IsGemini3FlashModel(model string) bool { - return gemini3FlashPattern.MatchString(model) -} - -// IsGemini25Model returns true if the model is a Gemini 2.5 family model. -// Gemini 2.5 models should use thinkingBudget (number). -func IsGemini25Model(model string) bool { - return gemini25Pattern.MatchString(model) -} - -// Gemini3ProThinkingLevels are the valid thinkingLevel values for Gemini 3 Pro models. -var Gemini3ProThinkingLevels = []string{"low", "high"} - -// Gemini3FlashThinkingLevels are the valid thinkingLevel values for Gemini 3 Flash models. -var Gemini3FlashThinkingLevels = []string{"minimal", "low", "medium", "high"} - -func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte { - if budget == nil && includeThoughts == nil { - return body - } - updated := body - if budget != nil { - valuePath := "generationConfig.thinkingConfig.thinkingBudget" - rewritten, err := sjson.SetBytes(updated, valuePath, *budget) - if err == nil { - updated = rewritten - } - } - // Default to including thoughts when a budget override is present but no explicit include flag is provided. - incl := includeThoughts - if incl == nil && budget != nil && *budget != 0 { - defaultInclude := true - incl = &defaultInclude - } - if incl != nil { - if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() && - !gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() { - valuePath := "generationConfig.thinkingConfig.include_thoughts" - rewritten, err := sjson.SetBytes(updated, valuePath, *incl) - if err == nil { - updated = rewritten - } - } - } - return updated -} - -func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte { - if budget == nil && includeThoughts == nil { - return body - } - updated := body - if budget != nil { - valuePath := "request.generationConfig.thinkingConfig.thinkingBudget" - rewritten, err := sjson.SetBytes(updated, valuePath, *budget) - if err == nil { - updated = rewritten - } - } - // Default to including thoughts when a budget override is present but no explicit include flag is provided. - incl := includeThoughts - if incl == nil && budget != nil && *budget != 0 { - defaultInclude := true - incl = &defaultInclude - } - if incl != nil { - if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() && - !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() { - valuePath := "request.generationConfig.thinkingConfig.include_thoughts" - rewritten, err := sjson.SetBytes(updated, valuePath, *incl) - if err == nil { - updated = rewritten - } - } - } - return updated -} - -// ApplyGeminiThinkingLevel applies thinkingLevel config for Gemini 3 models. -// For standard Gemini API format (generationConfig.thinkingConfig path). -// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget. -func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool) []byte { - if level == "" && includeThoughts == nil { - return body - } - updated := body - if level != "" { - valuePath := "generationConfig.thinkingConfig.thinkingLevel" - rewritten, err := sjson.SetBytes(updated, valuePath, level) - if err == nil { - updated = rewritten - } - } - // Default to including thoughts when a level is set but no explicit include flag is provided. - incl := includeThoughts - if incl == nil && level != "" { - defaultInclude := true - incl = &defaultInclude - } - if incl != nil { - if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() && - !gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() { - valuePath := "generationConfig.thinkingConfig.includeThoughts" - rewritten, err := sjson.SetBytes(updated, valuePath, *incl) - if err == nil { - updated = rewritten - } - } - } - if tb := gjson.GetBytes(body, "generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() { - updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig.thinkingBudget") - } - return updated -} - -// ApplyGeminiCLIThinkingLevel applies thinkingLevel config for Gemini 3 models. -// For Gemini CLI API format (request.generationConfig.thinkingConfig path). -// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget. -func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *bool) []byte { - if level == "" && includeThoughts == nil { - return body - } - updated := body - if level != "" { - valuePath := "request.generationConfig.thinkingConfig.thinkingLevel" - rewritten, err := sjson.SetBytes(updated, valuePath, level) - if err == nil { - updated = rewritten - } - } - // Default to including thoughts when a level is set but no explicit include flag is provided. - incl := includeThoughts - if incl == nil && level != "" { - defaultInclude := true - incl = &defaultInclude - } - if incl != nil { - if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() && - !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() { - valuePath := "request.generationConfig.thinkingConfig.includeThoughts" - rewritten, err := sjson.SetBytes(updated, valuePath, *incl) - if err == nil { - updated = rewritten - } - } - } - if tb := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() { - updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig.thinkingBudget") - } - return updated -} - -// ValidateGemini3ThinkingLevel validates that the thinkingLevel is valid for the Gemini 3 model variant. -// Returns the validated level (normalized to lowercase) and true if valid, or empty string and false if invalid. -func ValidateGemini3ThinkingLevel(model, level string) (string, bool) { - if level == "" { - return "", false - } - normalized := strings.ToLower(strings.TrimSpace(level)) - - var validLevels []string - if IsGemini3ProModel(model) { - validLevels = Gemini3ProThinkingLevels - } else if IsGemini3FlashModel(model) { - validLevels = Gemini3FlashThinkingLevels - } else if IsGemini3Model(model) { - // Unknown Gemini 3 variant - allow all levels as fallback - validLevels = Gemini3FlashThinkingLevels - } else { - return "", false - } - - for _, valid := range validLevels { - if normalized == valid { - return normalized, true - } - } - return "", false -} - -// ThinkingBudgetToGemini3Level converts a thinkingBudget to a thinkingLevel for Gemini 3 models. -// This provides backward compatibility when thinkingBudget is provided for Gemini 3 models. -// Returns the appropriate thinkingLevel and true if conversion is possible. -func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) { - if !IsGemini3Model(model) { - return "", false - } - - // Map budget to level based on Google's documentation - // Gemini 3 Pro: "low", "high" (default: "high") - // Gemini 3 Flash: "minimal", "low", "medium", "high" (default: "high") - switch { - case budget == -1: - // Dynamic budget maps to "high" (API default) - return "high", true - case budget == 0: - // Zero budget - Gemini 3 doesn't support disabling thinking - // Map to lowest available level - if IsGemini3FlashModel(model) { - return "minimal", true - } - return "low", true - case budget > 0 && budget <= 512: - if IsGemini3FlashModel(model) { - return "minimal", true - } - return "low", true - case budget <= 1024: - return "low", true - case budget <= 8192: - if IsGemini3FlashModel(model) { - return "medium", true - } - return "low", true // Pro doesn't have medium, use low - default: - return "high", true - } -} - -// modelsWithDefaultThinking lists models that should have thinking enabled by default -// when no explicit thinkingConfig is provided. -// Note: Gemini 3 models are NOT included here because per Google's official documentation: -// - thinkingLevel defaults to "high" (dynamic thinking) -// - includeThoughts defaults to false -// -// We should not override these API defaults; let users explicitly configure if needed. -var modelsWithDefaultThinking = map[string]bool{ - // "gemini-3-pro-preview": true, - // "gemini-3-pro-image-preview": true, - // "gemini-3-flash-preview": true, -} - -// ModelHasDefaultThinking returns true if the model should have thinking enabled by default. -func ModelHasDefaultThinking(model string) bool { - return modelsWithDefaultThinking[model] -} - -// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it. -// For standard Gemini API format (generationConfig.thinkingConfig path). -// Returns the modified body if thinkingConfig was added, otherwise returns the original. -// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation. -func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { - if !ModelHasDefaultThinking(model) { - return body - } - if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() { - return body - } - // Gemini 3 models use thinkingLevel instead of thinkingBudget - if IsGemini3Model(model) { - // Don't set a default - let the API use its dynamic default ("high") - // Only set includeThoughts - updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.includeThoughts", true) - return updated - } - // Gemini 2.5 and other models use thinkingBudget - updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1) - updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true) - return updated -} - -// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models. -// For standard Gemini API format (generationConfig.thinkingConfig path). -// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)) -// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel. -func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte { - // Use the alias from metadata if available for model type detection - lookupModel := ResolveOriginalModel(model, metadata) - if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { - return body - } - - // Determine which model to use for validation - checkModel := model - if IsGemini3Model(lookupModel) { - checkModel = lookupModel - } - - // First try to get effort string from metadata - effort, ok := ReasoningEffortFromMetadata(metadata) - if ok && effort != "" { - if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { - return ApplyGeminiThinkingLevel(body, level, nil) - } - } - - // Fallback: check for numeric budget and convert to thinkingLevel - budget, _, _, matched := ThinkingFromMetadata(metadata) - if matched && budget != nil { - if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid { - return ApplyGeminiThinkingLevel(body, level, nil) - } - } - - return body -} - -// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models. -// For Gemini CLI API format (request.generationConfig.thinkingConfig path). -// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)) -// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel. -func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte { - // Use the alias from metadata if available for model type detection - lookupModel := ResolveOriginalModel(model, metadata) - if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { - return body - } - - // Determine which model to use for validation - checkModel := model - if IsGemini3Model(lookupModel) { - checkModel = lookupModel - } - - // First try to get effort string from metadata - effort, ok := ReasoningEffortFromMetadata(metadata) - if ok && effort != "" { - if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { - return ApplyGeminiCLIThinkingLevel(body, level, nil) - } - } - - // Fallback: check for numeric budget and convert to thinkingLevel - budget, _, _, matched := ThinkingFromMetadata(metadata) - if matched && budget != nil { - if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid { - return ApplyGeminiCLIThinkingLevel(body, level, nil) - } - } - - return body -} - -// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it. -// For Gemini CLI API format (request.generationConfig.thinkingConfig path). -// Returns the modified body if thinkingConfig was added, otherwise returns the original. -// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation. -func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte { - // Use the alias from metadata if available for model property lookup - lookupModel := ResolveOriginalModel(model, metadata) - if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) { - return body - } - if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() { - return body - } - // Gemini 3 models use thinkingLevel instead of thinkingBudget - if IsGemini3Model(lookupModel) || IsGemini3Model(model) { - // Don't set a default - let the API use its dynamic default ("high") - // Only set includeThoughts - updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true) - return updated - } - // Gemini 2.5 and other models use thinkingBudget - updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true) - return updated -} - -// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body -// when the target model does not advertise Thinking capability. It cleans both -// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net -// in case upstream injected thinking for an unsupported model. -func StripThinkingConfigIfUnsupported(model string, body []byte) []byte { - if ModelSupportsThinking(model) || len(body) == 0 { - return body - } - updated := body - // Gemini CLI path - updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig") - // Standard Gemini path - updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig") - return updated -} - -// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini -// request body (generationConfig.thinkingConfig.thinkingBudget path). -// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation, -// unless skipGemini3Check is provided and true. -func NormalizeGeminiThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte { - const budgetPath = "generationConfig.thinkingConfig.thinkingBudget" - const levelPath = "generationConfig.thinkingConfig.thinkingLevel" - - budget := gjson.GetBytes(body, budgetPath) - if !budget.Exists() { - return body - } - - // For Gemini 3 models, convert thinkingBudget to thinkingLevel - skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0] - if IsGemini3Model(model) && !skipGemini3 { - if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok { - updated, _ := sjson.SetBytes(body, levelPath, level) - updated, _ = sjson.DeleteBytes(updated, budgetPath) - return updated - } - // If conversion fails, just remove the budget (let API use default) - updated, _ := sjson.DeleteBytes(body, budgetPath) - return updated - } - - // For Gemini 2.5 and other models, normalize the budget value - normalized := NormalizeThinkingBudget(model, int(budget.Int())) - updated, _ := sjson.SetBytes(body, budgetPath, normalized) - return updated -} - -// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI -// request body (request.generationConfig.thinkingConfig.thinkingBudget path). -// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation, -// unless skipGemini3Check is provided and true. -func NormalizeGeminiCLIThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte { - const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget" - const levelPath = "request.generationConfig.thinkingConfig.thinkingLevel" - - budget := gjson.GetBytes(body, budgetPath) - if !budget.Exists() { - return body - } - - // For Gemini 3 models, convert thinkingBudget to thinkingLevel - skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0] - if IsGemini3Model(model) && !skipGemini3 { - if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok { - updated, _ := sjson.SetBytes(body, levelPath, level) - updated, _ = sjson.DeleteBytes(updated, budgetPath) - return updated - } - // If conversion fails, just remove the budget (let API use default) - updated, _ := sjson.DeleteBytes(body, budgetPath) - return updated - } - - // For Gemini 2.5 and other models, normalize the budget value - normalized := NormalizeThinkingBudget(model, int(budget.Int())) - updated, _ := sjson.SetBytes(body, budgetPath, normalized) - return updated -} - -// ReasoningEffortBudgetMapping defines the thinkingBudget values for each reasoning effort level. -var ReasoningEffortBudgetMapping = map[string]int{ - "none": 0, - "auto": -1, - "minimal": 512, - "low": 1024, - "medium": 8192, - "high": 24576, - "xhigh": 32768, -} - -// ApplyReasoningEffortToGemini applies OpenAI reasoning_effort to Gemini thinkingConfig -// for standard Gemini API format (generationConfig.thinkingConfig path). -// Returns the modified body with thinkingBudget and include_thoughts set. -func ApplyReasoningEffortToGemini(body []byte, effort string) []byte { - normalized := strings.ToLower(strings.TrimSpace(effort)) - if normalized == "" { - return body - } - - budgetPath := "generationConfig.thinkingConfig.thinkingBudget" - includePath := "generationConfig.thinkingConfig.include_thoughts" - - if normalized == "none" { - body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig") - return body - } - - budget, ok := ReasoningEffortBudgetMapping[normalized] - if !ok { - return body - } - - body, _ = sjson.SetBytes(body, budgetPath, budget) - body, _ = sjson.SetBytes(body, includePath, true) - return body -} - -// ApplyReasoningEffortToGeminiCLI applies OpenAI reasoning_effort to Gemini CLI thinkingConfig -// for Gemini CLI API format (request.generationConfig.thinkingConfig path). -// Returns the modified body with thinkingBudget and include_thoughts set. -func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte { - normalized := strings.ToLower(strings.TrimSpace(effort)) - if normalized == "" { - return body - } - - budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget" - includePath := "request.generationConfig.thinkingConfig.include_thoughts" - - if normalized == "none" { - body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig") - return body - } - - budget, ok := ReasoningEffortBudgetMapping[normalized] - if !ok { - return body - } - - body, _ = sjson.SetBytes(body, budgetPath, budget) - body, _ = sjson.SetBytes(body, includePath, true) - return body -} - -// ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel" -// and converts it to "thinkingBudget" for Gemini 2.5 models. -// For Gemini 3 models, preserves thinkingLevel unless skipGemini3Check is provided and true. -// Mappings for Gemini 2.5: -// - "high" -> 32768 -// - "medium" -> 8192 -// - "low" -> 1024 -// - "minimal" -> 512 -// -// It removes "thinkingLevel" after conversion (for Gemini 2.5 only). -func ConvertThinkingLevelToBudget(body []byte, model string, skipGemini3Check ...bool) []byte { - levelPath := "generationConfig.thinkingConfig.thinkingLevel" - res := gjson.GetBytes(body, levelPath) - if !res.Exists() { - return body - } - - // For Gemini 3 models, preserve thinkingLevel unless explicitly skipped - skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0] - if IsGemini3Model(model) && !skipGemini3 { - return body - } - - budget, ok := ThinkingLevelToBudget(res.String()) - if !ok { - updated, _ := sjson.DeleteBytes(body, levelPath) - return updated - } - - budgetPath := "generationConfig.thinkingConfig.thinkingBudget" - updated, err := sjson.SetBytes(body, budgetPath, budget) - if err != nil { - return body - } - - updated, err = sjson.DeleteBytes(updated, levelPath) - if err != nil { - return body - } - return updated -} - -// ConvertThinkingLevelToBudgetCLI checks for "request.generationConfig.thinkingConfig.thinkingLevel" -// and converts it to "thinkingBudget" for Gemini 2.5 models. -// For Gemini 3 models, preserves thinkingLevel as-is (does not convert). -func ConvertThinkingLevelToBudgetCLI(body []byte, model string) []byte { - levelPath := "request.generationConfig.thinkingConfig.thinkingLevel" - res := gjson.GetBytes(body, levelPath) - if !res.Exists() { - return body - } - - // For Gemini 3 models, preserve thinkingLevel - don't convert to budget - if IsGemini3Model(model) { - return body - } - - budget, ok := ThinkingLevelToBudget(res.String()) - if !ok { - updated, _ := sjson.DeleteBytes(body, levelPath) - return updated - } - - budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget" - updated, err := sjson.SetBytes(body, budgetPath, budget) - if err != nil { - return body - } - - updated, err = sjson.DeleteBytes(updated, levelPath) - if err != nil { - return body - } - return updated -} diff --git a/internal/util/thinking.go b/internal/util/thinking.go deleted file mode 100644 index 3ce1bb0d..00000000 --- a/internal/util/thinking.go +++ /dev/null @@ -1,245 +0,0 @@ -package util - -import ( - "strings" - - "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" -) - -// ModelSupportsThinking reports whether the given model has Thinking capability -// according to the model registry metadata (provider-agnostic). -func ModelSupportsThinking(model string) bool { - if model == "" { - return false - } - // First check the global dynamic registry - if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil { - return info.Thinking != nil - } - // Fallback: check static model definitions - if info := registry.LookupStaticModelInfo(model); info != nil { - return info.Thinking != nil - } - // Fallback: check Antigravity static config - if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil { - return cfg.Thinking != nil - } - return false -} - -// NormalizeThinkingBudget clamps the requested thinking budget to the -// supported range for the specified model using registry metadata only. -// If the model is unknown or has no Thinking metadata, returns the original budget. -// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range -// or min (0 if zero is allowed and mid <= 0). -func NormalizeThinkingBudget(model string, budget int) int { - if budget == -1 { // dynamic - if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found { - if dynamicAllowed { - return -1 - } - mid := (minBudget + maxBudget) / 2 - if mid <= 0 && zeroAllowed { - return 0 - } - if mid <= 0 { - return minBudget - } - return mid - } - return -1 - } - if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found { - if budget == 0 { - if zeroAllowed { - return 0 - } - return minBudget - } - if budget < minBudget { - return minBudget - } - if budget > maxBudget { - return maxBudget - } - return budget - } - return budget -} - -// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry. -func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) { - if model == "" { - return false, 0, 0, false, false - } - // First check global dynamic registry - if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil { - return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed - } - // Fallback: check static model definitions - if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil { - return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed - } - // Fallback: check Antigravity static config - if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil { - return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed - } - return false, 0, 0, false, false -} - -// GetModelThinkingLevels returns the discrete reasoning effort levels for the model. -// Returns nil if the model has no thinking support or no levels defined. -func GetModelThinkingLevels(model string) []string { - if model == "" { - return nil - } - info := registry.GetGlobalRegistry().GetModelInfo(model) - if info == nil || info.Thinking == nil { - return nil - } - return info.Thinking.Levels -} - -// ModelUsesThinkingLevels reports whether the model uses discrete reasoning -// effort levels instead of numeric budgets. -func ModelUsesThinkingLevels(model string) bool { - levels := GetModelThinkingLevels(model) - return len(levels) > 0 -} - -// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort -// level for the given model. Returns false when the level is not supported. -func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { - levels := GetModelThinkingLevels(model) - if len(levels) == 0 { - return "", false - } - loweredEffort := strings.ToLower(strings.TrimSpace(effort)) - for _, lvl := range levels { - if strings.ToLower(lvl) == loweredEffort { - return lvl, true - } - } - return "", false -} - -// IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model. -// These models may not advertise Thinking metadata in the registry. -func IsOpenAICompatibilityModel(model string) bool { - if model == "" { - return false - } - info := registry.GetGlobalRegistry().GetModelInfo(model) - if info == nil { - return false - } - return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility") -} - -// ThinkingEffortToBudget maps a reasoning effort level to a numeric thinking budget (tokens), -// clamping the result to the model's supported range. -// -// Mappings (values are normalized to model's supported range): -// - "none" -> 0 -// - "auto" -> -1 -// - "minimal" -> 512 -// - "low" -> 1024 -// - "medium" -> 8192 -// - "high" -> 24576 -// - "xhigh" -> 32768 -// -// Returns false when the effort level is empty or unsupported. -func ThinkingEffortToBudget(model, effort string) (int, bool) { - if effort == "" { - return 0, false - } - normalized, ok := NormalizeReasoningEffortLevel(model, effort) - if !ok { - normalized = strings.ToLower(strings.TrimSpace(effort)) - } - switch normalized { - case "none": - return 0, true - case "auto": - return NormalizeThinkingBudget(model, -1), true - case "minimal": - return NormalizeThinkingBudget(model, 512), true - case "low": - return NormalizeThinkingBudget(model, 1024), true - case "medium": - return NormalizeThinkingBudget(model, 8192), true - case "high": - return NormalizeThinkingBudget(model, 24576), true - case "xhigh": - return NormalizeThinkingBudget(model, 32768), true - default: - return 0, false - } -} - -// ThinkingLevelToBudget maps a Gemini thinkingLevel to a numeric thinking budget (tokens). -// -// Mappings: -// - "minimal" -> 512 -// - "low" -> 1024 -// - "medium" -> 8192 -// - "high" -> 32768 -// -// Returns false when the level is empty or unsupported. -func ThinkingLevelToBudget(level string) (int, bool) { - if level == "" { - return 0, false - } - normalized := strings.ToLower(strings.TrimSpace(level)) - switch normalized { - case "minimal": - return 512, true - case "low": - return 1024, true - case "medium": - return 8192, true - case "high": - return 32768, true - default: - return 0, false - } -} - -// ThinkingBudgetToEffort maps a numeric thinking budget (tokens) -// to a reasoning effort level for level-based models. -// -// Mappings: -// - 0 -> "none" (or lowest supported level if model doesn't support "none") -// - -1 -> "auto" -// - 1..1024 -> "low" -// - 1025..8192 -> "medium" -// - 8193..24576 -> "high" -// - 24577.. -> highest supported level for the model (defaults to "xhigh") -// -// Returns false when the budget is unsupported (negative values other than -1). -func ThinkingBudgetToEffort(model string, budget int) (string, bool) { - switch { - case budget == -1: - return "auto", true - case budget < -1: - return "", false - case budget == 0: - if levels := GetModelThinkingLevels(model); len(levels) > 0 { - return levels[0], true - } - return "none", true - case budget > 0 && budget <= 1024: - return "low", true - case budget <= 8192: - return "medium", true - case budget <= 24576: - return "high", true - case budget > 24576: - if levels := GetModelThinkingLevels(model); len(levels) > 0 { - return levels[len(levels)-1], true - } - return "xhigh", true - default: - return "", false - } -} diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go deleted file mode 100644 index 0a72b4c5..00000000 --- a/internal/util/thinking_suffix.go +++ /dev/null @@ -1,296 +0,0 @@ -package util - -import ( - "encoding/json" - "strconv" - "strings" -) - -const ( - ThinkingBudgetMetadataKey = "thinking_budget" - ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts" - ReasoningEffortMetadataKey = "reasoning_effort" - ThinkingOriginalModelMetadataKey = "thinking_original_model" - ModelMappingOriginalModelMetadataKey = "model_mapping_original_model" -) - -// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns -// the normalized base model with extracted metadata. Supported pattern: -// - "()" where value can be: -// - A numeric budget (e.g., "(8192)", "(16384)") -// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)") -// -// Examples: -// - "claude-sonnet-4-5-20250929(16384)" → budget=16384 -// - "gpt-5.1(high)" → reasoning_effort="high" -// - "gemini-2.5-pro(32768)" → budget=32768 -// -// Note: Empty parentheses "()" are not supported and will be ignored. -func NormalizeThinkingModel(modelName string) (string, map[string]any) { - if modelName == "" { - return modelName, nil - } - - baseModel := modelName - - var ( - budgetOverride *int - reasoningEffort *string - matched bool - ) - - // Match "()" pattern at the end of the model name - if idx := strings.LastIndex(modelName, "("); idx != -1 { - if !strings.HasSuffix(modelName, ")") { - // Incomplete parenthesis, ignore - return baseModel, nil - } - - value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and ) - if value == "" { - // Empty parentheses not supported - return baseModel, nil - } - - candidateBase := modelName[:idx] - - // Auto-detect: pure numeric → budget, string → reasoning effort level - if parsed, ok := parseIntPrefix(value); ok { - // Numeric value: treat as thinking budget - baseModel = candidateBase - budgetOverride = &parsed - matched = true - } else { - // String value: treat as reasoning effort level - baseModel = candidateBase - raw := strings.ToLower(strings.TrimSpace(value)) - if raw != "" { - reasoningEffort = &raw - matched = true - } - } - } - - if !matched { - return baseModel, nil - } - - metadata := map[string]any{ - ThinkingOriginalModelMetadataKey: modelName, - } - if budgetOverride != nil { - metadata[ThinkingBudgetMetadataKey] = *budgetOverride - } - if reasoningEffort != nil { - metadata[ReasoningEffortMetadataKey] = *reasoningEffort - } - return baseModel, metadata -} - -// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel. -// It accepts both the new generic keys and legacy Gemini-specific keys. -func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) { - if len(metadata) == 0 { - return nil, nil, nil, false - } - - var ( - budgetPtr *int - includePtr *bool - effortPtr *string - matched bool - ) - - readBudget := func(key string) { - if budgetPtr != nil { - return - } - if raw, ok := metadata[key]; ok { - if v, okNumber := parseNumberToInt(raw); okNumber { - budget := v - budgetPtr = &budget - matched = true - } - } - } - - readInclude := func(key string) { - if includePtr != nil { - return - } - if raw, ok := metadata[key]; ok { - switch v := raw.(type) { - case bool: - val := v - includePtr = &val - matched = true - case *bool: - if v != nil { - val := *v - includePtr = &val - matched = true - } - } - } - } - - readEffort := func(key string) { - if effortPtr != nil { - return - } - if raw, ok := metadata[key]; ok { - if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" { - normalized := strings.ToLower(strings.TrimSpace(val)) - effortPtr = &normalized - matched = true - } - } - } - - readBudget(ThinkingBudgetMetadataKey) - readBudget(GeminiThinkingBudgetMetadataKey) - readInclude(ThinkingIncludeThoughtsMetadataKey) - readInclude(GeminiIncludeThoughtsMetadataKey) - readEffort(ReasoningEffortMetadataKey) - readEffort("reasoning.effort") - - return budgetPtr, includePtr, effortPtr, matched -} - -// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides, -// converting reasoning effort strings into budgets when possible. -func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) { - budget, include, effort, matched := ThinkingFromMetadata(metadata) - if !matched { - return nil, nil, false - } - // Level-based models (OpenAI-style) do not accept numeric thinking budgets in - // Claude/Gemini-style protocols, so we don't derive budgets for them here. - if ModelUsesThinkingLevels(model) { - return nil, nil, false - } - - if budget == nil && effort != nil { - if derived, ok := ThinkingEffortToBudget(model, *effort); ok { - budget = &derived - } - } - return budget, include, budget != nil || include != nil || effort != nil -} - -// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata, -// inferring "auto" and "none" when budgets request dynamic or disabled thinking. -func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { - budget, include, effort, matched := ThinkingFromMetadata(metadata) - if !matched { - return "", false - } - if effort != nil && *effort != "" { - return strings.ToLower(strings.TrimSpace(*effort)), true - } - if budget != nil { - switch *budget { - case -1: - return "auto", true - case 0: - return "none", true - } - } - if include != nil && !*include { - return "none", true - } - return "", true -} - -// ResolveOriginalModel returns the original model name stored in metadata (if present), -// otherwise falls back to the provided model. -func ResolveOriginalModel(model string, metadata map[string]any) string { - normalize := func(name string) string { - if name == "" { - return "" - } - if base, _ := NormalizeThinkingModel(name); base != "" { - return base - } - return strings.TrimSpace(name) - } - - if metadata != nil { - if v, ok := metadata[ModelMappingOriginalModelMetadataKey]; ok { - if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" { - if base := normalize(s); base != "" { - return base - } - } - } - if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok { - if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" { - if base := normalize(s); base != "" { - return base - } - } - } - if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok { - if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" { - if base := normalize(s); base != "" { - return base - } - } - } - } - // Fallback: try to re-normalize the model name when metadata was dropped. - if base := normalize(model); base != "" { - return base - } - return model -} - -func parseIntPrefix(value string) (int, bool) { - if value == "" { - return 0, false - } - digits := strings.TrimLeft(value, "-") - if digits == "" { - return 0, false - } - end := len(digits) - for i := 0; i < len(digits); i++ { - if digits[i] < '0' || digits[i] > '9' { - end = i - break - } - } - if end == 0 { - return 0, false - } - val, err := strconv.Atoi(digits[:end]) - if err != nil { - return 0, false - } - return val, true -} - -func parseNumberToInt(raw any) (int, bool) { - switch v := raw.(type) { - case int: - return v, true - case int32: - return int(v), true - case int64: - return int(v), true - case float64: - return int(v), true - case json.Number: - if val, err := v.Int64(); err == nil { - return int(val), true - } - case string: - if strings.TrimSpace(v) == "" { - return 0, false - } - if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil { - return parsed, true - } - } - return 0, false -} diff --git a/internal/util/thinking_text.go b/internal/util/thinking_text.go deleted file mode 100644 index c36d202d..00000000 --- a/internal/util/thinking_text.go +++ /dev/null @@ -1,87 +0,0 @@ -package util - -import ( - "github.com/tidwall/gjson" - "github.com/tidwall/sjson" -) - -// GetThinkingText extracts the thinking text from a content part. -// Handles various formats: -// - Simple string: { "thinking": "text" } or { "text": "text" } -// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } } -// - Gemini-style: { "thought": true, "text": "text" } -// Returns the extracted text string. -func GetThinkingText(part gjson.Result) string { - // Try direct text field first (Gemini-style) - if text := part.Get("text"); text.Exists() && text.Type == gjson.String { - return text.String() - } - - // Try thinking field - thinkingField := part.Get("thinking") - if !thinkingField.Exists() { - return "" - } - - // thinking is a string - if thinkingField.Type == gjson.String { - return thinkingField.String() - } - - // thinking is an object with inner text/thinking - if thinkingField.IsObject() { - if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String { - return inner.String() - } - if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String { - return inner.String() - } - } - - return "" -} - -// GetThinkingTextFromJSON extracts thinking text from a raw JSON string. -func GetThinkingTextFromJSON(jsonStr string) string { - return GetThinkingText(gjson.Parse(jsonStr)) -} - -// SanitizeThinkingPart normalizes a thinking part to a canonical form. -// Strips cache_control and other non-essential fields. -// Returns the sanitized part as JSON string. -func SanitizeThinkingPart(part gjson.Result) string { - // Gemini-style: { thought: true, text, thoughtSignature } - if part.Get("thought").Bool() { - result := `{"thought":true}` - if text := GetThinkingText(part); text != "" { - result, _ = sjson.Set(result, "text", text) - } - if sig := part.Get("thoughtSignature"); sig.Exists() && sig.Type == gjson.String { - result, _ = sjson.Set(result, "thoughtSignature", sig.String()) - } - return result - } - - // Anthropic-style: { type: "thinking", thinking, signature } - if part.Get("type").String() == "thinking" || part.Get("thinking").Exists() { - result := `{"type":"thinking"}` - if text := GetThinkingText(part); text != "" { - result, _ = sjson.Set(result, "thinking", text) - } - if sig := part.Get("signature"); sig.Exists() && sig.Type == gjson.String { - result, _ = sjson.Set(result, "signature", sig.String()) - } - return result - } - - // Not a thinking part, return as-is but strip cache_control - return StripCacheControl(part.Raw) -} - -// StripCacheControl removes cache_control and providerOptions from a JSON object. -func StripCacheControl(jsonStr string) string { - result := jsonStr - result, _ = sjson.Delete(result, "cache_control") - result, _ = sjson.Delete(result, "providerOptions") - return result -} diff --git a/internal/watcher/config_reload.go b/internal/watcher/config_reload.go index 370ee4e1..edac3474 100644 --- a/internal/watcher/config_reload.go +++ b/internal/watcher/config_reload.go @@ -127,7 +127,7 @@ func (w *Watcher) reloadConfig() bool { } authDirChanged := oldConfig == nil || oldConfig.AuthDir != newConfig.AuthDir - forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelMappings, newConfig.OAuthModelMappings)) + forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias)) log.Infof("config successfully reloaded, triggering client reload") w.reloadClients(authDirChanged, affectedOAuthProviders, forceAuthRefresh) diff --git a/internal/watcher/diff/config_diff.go b/internal/watcher/diff/config_diff.go index fecbc242..2620f4ee 100644 --- a/internal/watcher/diff/config_diff.go +++ b/internal/watcher/diff/config_diff.go @@ -212,7 +212,7 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if entries, _ := DiffOAuthExcludedModelChanges(oldCfg.OAuthExcludedModels, newCfg.OAuthExcludedModels); len(entries) > 0 { changes = append(changes, entries...) } - if entries, _ := DiffOAuthModelMappingChanges(oldCfg.OAuthModelMappings, newCfg.OAuthModelMappings); len(entries) > 0 { + if entries, _ := DiffOAuthModelAliasChanges(oldCfg.OAuthModelAlias, newCfg.OAuthModelAlias); len(entries) > 0 { changes = append(changes, entries...) } diff --git a/internal/watcher/diff/oauth_model_mappings.go b/internal/watcher/diff/oauth_model_alias.go similarity index 51% rename from internal/watcher/diff/oauth_model_mappings.go rename to internal/watcher/diff/oauth_model_alias.go index c002855c..c5a17d29 100644 --- a/internal/watcher/diff/oauth_model_mappings.go +++ b/internal/watcher/diff/oauth_model_alias.go @@ -10,23 +10,23 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" ) -type OAuthModelMappingsSummary struct { +type OAuthModelAliasSummary struct { hash string count int } -// SummarizeOAuthModelMappings summarizes OAuth model mappings per channel. -func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string]OAuthModelMappingsSummary { +// SummarizeOAuthModelAlias summarizes OAuth model alias per channel. +func SummarizeOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string]OAuthModelAliasSummary { if len(entries) == 0 { return nil } - out := make(map[string]OAuthModelMappingsSummary, len(entries)) + out := make(map[string]OAuthModelAliasSummary, len(entries)) for k, v := range entries { key := strings.ToLower(strings.TrimSpace(k)) if key == "" { continue } - out[key] = summarizeOAuthModelMappingList(v) + out[key] = summarizeOAuthModelAliasList(v) } if len(out) == 0 { return nil @@ -34,10 +34,10 @@ func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) m return out } -// DiffOAuthModelMappingChanges compares OAuth model mappings maps. -func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMapping) ([]string, []string) { - oldSummary := SummarizeOAuthModelMappings(oldMap) - newSummary := SummarizeOAuthModelMappings(newMap) +// DiffOAuthModelAliasChanges compares OAuth model alias maps. +func DiffOAuthModelAliasChanges(oldMap, newMap map[string][]config.OAuthModelAlias) ([]string, []string) { + oldSummary := SummarizeOAuthModelAlias(oldMap) + newSummary := SummarizeOAuthModelAlias(newMap) keys := make(map[string]struct{}, len(oldSummary)+len(newSummary)) for k := range oldSummary { keys[k] = struct{}{} @@ -52,13 +52,13 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa newInfo, okNew := newSummary[key] switch { case okOld && !okNew: - changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: removed", key)) + changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: removed", key)) affected = append(affected, key) case !okOld && okNew: - changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: added (%d entries)", key, newInfo.count)) + changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: added (%d entries)", key, newInfo.count)) affected = append(affected, key) case okOld && okNew && oldInfo.hash != newInfo.hash: - changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count)) + changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count)) affected = append(affected, key) } } @@ -67,20 +67,20 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa return changes, affected } -func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMappingsSummary { +func summarizeOAuthModelAliasList(list []config.OAuthModelAlias) OAuthModelAliasSummary { if len(list) == 0 { - return OAuthModelMappingsSummary{} + return OAuthModelAliasSummary{} } seen := make(map[string]struct{}, len(list)) normalized := make([]string, 0, len(list)) - for _, mapping := range list { - name := strings.ToLower(strings.TrimSpace(mapping.Name)) - alias := strings.ToLower(strings.TrimSpace(mapping.Alias)) - if name == "" || alias == "" { + for _, alias := range list { + name := strings.ToLower(strings.TrimSpace(alias.Name)) + aliasVal := strings.ToLower(strings.TrimSpace(alias.Alias)) + if name == "" || aliasVal == "" { continue } - key := name + "->" + alias - if mapping.Fork { + key := name + "->" + aliasVal + if alias.Fork { key += "|fork" } if _, exists := seen[key]; exists { @@ -90,11 +90,11 @@ func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMa normalized = append(normalized, key) } if len(normalized) == 0 { - return OAuthModelMappingsSummary{} + return OAuthModelAliasSummary{} } sort.Strings(normalized) sum := sha256.Sum256([]byte(strings.Join(normalized, "|"))) - return OAuthModelMappingsSummary{ + return OAuthModelAliasSummary{ hash: hex.EncodeToString(sum[:]), count: len(normalized), } diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go index 2f2b2690..b1ae5885 100644 --- a/internal/watcher/synthesizer/config.go +++ b/internal/watcher/synthesizer/config.go @@ -2,6 +2,7 @@ package synthesizer import ( "fmt" + "strconv" "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff" @@ -59,6 +60,9 @@ func (s *ConfigSynthesizer) synthesizeGeminiKeys(ctx *SynthesisContext) []*corea "source": fmt.Sprintf("config:gemini[%s]", token), "api_key": key, } + if entry.Priority != 0 { + attrs["priority"] = strconv.Itoa(entry.Priority) + } if base != "" { attrs["base_url"] = base } @@ -103,6 +107,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea "source": fmt.Sprintf("config:claude[%s]", token), "api_key": key, } + if ck.Priority != 0 { + attrs["priority"] = strconv.Itoa(ck.Priority) + } if base != "" { attrs["base_url"] = base } @@ -147,6 +154,9 @@ func (s *ConfigSynthesizer) synthesizeCodexKeys(ctx *SynthesisContext) []*coreau "source": fmt.Sprintf("config:codex[%s]", token), "api_key": key, } + if ck.Priority != 0 { + attrs["priority"] = strconv.Itoa(ck.Priority) + } if ck.BaseURL != "" { attrs["base_url"] = ck.BaseURL } @@ -202,6 +212,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor "compat_name": compat.Name, "provider_key": providerName, } + if compat.Priority != 0 { + attrs["priority"] = strconv.Itoa(compat.Priority) + } if key != "" { attrs["api_key"] = key } @@ -233,6 +246,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor "compat_name": compat.Name, "provider_key": providerName, } + if compat.Priority != 0 { + attrs["priority"] = strconv.Itoa(compat.Priority) + } if hash := diff.ComputeOpenAICompatModelsHash(compat.Models); hash != "" { attrs["models_hash"] = hash } @@ -275,6 +291,9 @@ func (s *ConfigSynthesizer) synthesizeVertexCompat(ctx *SynthesisContext) []*cor "base_url": base, "provider_key": providerName, } + if compat.Priority != 0 { + attrs["priority"] = strconv.Itoa(compat.Priority) + } if key != "" { attrs["api_key"] = key } diff --git a/sdk/api/handlers/gemini/gemini_handlers.go b/sdk/api/handlers/gemini/gemini_handlers.go index f2bdb058..27d8d1f5 100644 --- a/sdk/api/handlers/gemini/gemini_handlers.go +++ b/sdk/api/handlers/gemini/gemini_handlers.go @@ -56,8 +56,12 @@ func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) { for k, v := range model { normalizedModel[k] = v } - if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") { - normalizedModel["name"] = "models/" + name + if name, ok := normalizedModel["name"].(string); ok && name != "" { + if !strings.HasPrefix(name, "models/") { + normalizedModel["name"] = "models/" + name + } + normalizedModel["displayName"] = name + normalizedModel["description"] = name } if _, ok := normalizedModel["supportedGenerationMethods"]; !ok { normalizedModel["supportedGenerationMethods"] = defaultMethods @@ -85,94 +89,35 @@ func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) { return } action := strings.TrimPrefix(request.Action, "/") - switch action { - case "gemini-3-pro-preview": - c.JSON(http.StatusOK, gin.H{ - "name": "models/gemini-3-pro-preview", - "version": "3", - "displayName": "Gemini 3 Pro Preview", - "description": "Gemini 3 Pro Preview", - "inputTokenLimit": 1048576, - "outputTokenLimit": 65536, - "supportedGenerationMethods": []string{ - "generateContent", - "countTokens", - "createCachedContent", - "batchGenerateContent", - }, - "temperature": 1, - "topP": 0.95, - "topK": 64, - "maxTemperature": 2, - "thinking": true, - }, - ) - case "gemini-2.5-pro": - c.JSON(http.StatusOK, gin.H{ - "name": "models/gemini-2.5-pro", - "version": "2.5", - "displayName": "Gemini 2.5 Pro", - "description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro", - "inputTokenLimit": 1048576, - "outputTokenLimit": 65536, - "supportedGenerationMethods": []string{ - "generateContent", - "countTokens", - "createCachedContent", - "batchGenerateContent", - }, - "temperature": 1, - "topP": 0.95, - "topK": 64, - "maxTemperature": 2, - "thinking": true, - }, - ) - case "gemini-2.5-flash": - c.JSON(http.StatusOK, gin.H{ - "name": "models/gemini-2.5-flash", - "version": "001", - "displayName": "Gemini 2.5 Flash", - "description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", - "inputTokenLimit": 1048576, - "outputTokenLimit": 65536, - "supportedGenerationMethods": []string{ - "generateContent", - "countTokens", - "createCachedContent", - "batchGenerateContent", - }, - "temperature": 1, - "topP": 0.95, - "topK": 64, - "maxTemperature": 2, - "thinking": true, - }) - case "gpt-5": - c.JSON(http.StatusOK, gin.H{ - "name": "gpt-5", - "version": "001", - "displayName": "GPT 5", - "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", - "inputTokenLimit": 400000, - "outputTokenLimit": 128000, - "supportedGenerationMethods": []string{ - "generateContent", - }, - "temperature": 1, - "topP": 0.95, - "topK": 64, - "maxTemperature": 2, - "thinking": true, - }) - default: - c.JSON(http.StatusNotFound, handlers.ErrorResponse{ - Error: handlers.ErrorDetail{ - Message: "Not Found", - Type: "not_found", - }, - }) + + // Get dynamic models from the global registry and find the matching one + availableModels := h.Models() + var targetModel map[string]any + + for _, model := range availableModels { + name, _ := model["name"].(string) + // Match name with or without 'models/' prefix + if name == action || name == "models/"+action { + targetModel = model + break + } } + + if targetModel != nil { + // Ensure the name has 'models/' prefix in the output if it's a Gemini model + if name, ok := targetModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") { + targetModel["name"] = "models/" + name + } + c.JSON(http.StatusOK, targetModel) + return + } + + c.JSON(http.StatusNotFound, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: "Not Found", + Type: "not_found", + }, + }) } // GeminiHandler handles POST requests for Gemini API operations. diff --git a/sdk/api/handlers/handlers.go b/sdk/api/handlers/handlers.go index 6160b9bd..232f0b95 100644 --- a/sdk/api/handlers/handlers.go +++ b/sdk/api/handlers/handlers.go @@ -16,6 +16,7 @@ import ( "github.com/google/uuid" "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" "github.com/router-for-me/CLIProxyAPI/v6/internal/logging" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -379,7 +380,7 @@ func appendAPIResponse(c *gin.Context, data []byte) { // ExecuteWithAuthManager executes a non-streaming request via the core auth manager. // This path is the only supported execution route. func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) { - providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName) + providers, normalizedModel, errMsg := h.getRequestDetails(modelName) if errMsg != nil { return nil, errMsg } @@ -388,16 +389,13 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType Model: normalizedModel, Payload: cloneBytes(rawJSON), } - if cloned := cloneMetadata(metadata); cloned != nil { - req.Metadata = cloned - } opts := coreexecutor.Options{ Stream: false, Alt: alt, OriginalRequest: cloneBytes(rawJSON), SourceFormat: sdktranslator.FromString(handlerType), } - opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta) + opts.Metadata = reqMeta resp, err := h.AuthManager.Execute(ctx, providers, req, opts) if err != nil { status := http.StatusInternalServerError @@ -420,7 +418,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType // ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager. // This path is the only supported execution route. func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) { - providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName) + providers, normalizedModel, errMsg := h.getRequestDetails(modelName) if errMsg != nil { return nil, errMsg } @@ -429,16 +427,13 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle Model: normalizedModel, Payload: cloneBytes(rawJSON), } - if cloned := cloneMetadata(metadata); cloned != nil { - req.Metadata = cloned - } opts := coreexecutor.Options{ Stream: false, Alt: alt, OriginalRequest: cloneBytes(rawJSON), SourceFormat: sdktranslator.FromString(handlerType), } - opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta) + opts.Metadata = reqMeta resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts) if err != nil { status := http.StatusInternalServerError @@ -461,7 +456,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle // ExecuteStreamWithAuthManager executes a streaming request via the core auth manager. // This path is the only supported execution route. func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) { - providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName) + providers, normalizedModel, errMsg := h.getRequestDetails(modelName) if errMsg != nil { errChan := make(chan *interfaces.ErrorMessage, 1) errChan <- errMsg @@ -473,16 +468,13 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl Model: normalizedModel, Payload: cloneBytes(rawJSON), } - if cloned := cloneMetadata(metadata); cloned != nil { - req.Metadata = cloned - } opts := coreexecutor.Options{ Stream: true, Alt: alt, OriginalRequest: cloneBytes(rawJSON), SourceFormat: sdktranslator.FromString(handlerType), } - opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta) + opts.Metadata = reqMeta chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts) if err != nil { errChan := make(chan *interfaces.ErrorMessage, 1) @@ -595,38 +587,40 @@ func statusFromError(err error) int { return 0 } -func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) { - // Resolve "auto" model to an actual available model first - resolvedModelName := util.ResolveAutoModel(modelName) - - // Normalize the model name to handle dynamic thinking suffixes before determining the provider. - normalizedModel, metadata = normalizeModelMetadata(resolvedModelName) - - // Use the normalizedModel to get the provider name. - providers = util.GetProviderName(normalizedModel) - if len(providers) == 0 && metadata != nil { - if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok { - if originalModel, okStr := originalRaw.(string); okStr { - originalModel = strings.TrimSpace(originalModel) - if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) { - if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 { - providers = altProviders - normalizedModel = originalModel - } - } - } +func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, err *interfaces.ErrorMessage) { + resolvedModelName := modelName + initialSuffix := thinking.ParseSuffix(modelName) + if initialSuffix.ModelName == "auto" { + resolvedBase := util.ResolveAutoModel(initialSuffix.ModelName) + if initialSuffix.HasSuffix { + resolvedModelName = fmt.Sprintf("%s(%s)", resolvedBase, initialSuffix.RawSuffix) + } else { + resolvedModelName = resolvedBase } + } else { + resolvedModelName = util.ResolveAutoModel(modelName) + } + + parsed := thinking.ParseSuffix(resolvedModelName) + baseModel := strings.TrimSpace(parsed.ModelName) + + providers = util.GetProviderName(baseModel) + // Fallback: if baseModel has no provider but differs from resolvedModelName, + // try using the full model name. This handles edge cases where custom models + // may be registered with their full suffixed name (e.g., "my-model(8192)"). + // Evaluated in Story 11.8: This fallback is intentionally preserved to support + // custom model registrations that include thinking suffixes. + if len(providers) == 0 && baseModel != resolvedModelName { + providers = util.GetProviderName(resolvedModelName) } if len(providers) == 0 { - return nil, "", nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)} + return nil, "", &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)} } - // If it's a dynamic model, the normalizedModel was already set to extractedModelName. - // If it's a non-dynamic model, normalizedModel was set by normalizeModelMetadata. - // So, normalizedModel is already correctly set at this point. - - return providers, normalizedModel, metadata, nil + // The thinking suffix is preserved in the model name itself, so no + // metadata-based configuration passing is needed. + return providers, resolvedModelName, nil } func cloneBytes(src []byte) []byte { @@ -638,10 +632,6 @@ func cloneBytes(src []byte) []byte { return dst } -func normalizeModelMetadata(modelName string) (string, map[string]any) { - return util.NormalizeThinkingModel(modelName) -} - func cloneMetadata(src map[string]any) map[string]any { if len(src) == 0 { return nil diff --git a/sdk/api/handlers/handlers_request_details_test.go b/sdk/api/handlers/handlers_request_details_test.go new file mode 100644 index 00000000..b0f6b132 --- /dev/null +++ b/sdk/api/handlers/handlers_request_details_test.go @@ -0,0 +1,118 @@ +package handlers + +import ( + "reflect" + "testing" + "time" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config" +) + +func TestGetRequestDetails_PreservesSuffix(t *testing.T) { + modelRegistry := registry.GetGlobalRegistry() + now := time.Now().Unix() + + modelRegistry.RegisterClient("test-request-details-gemini", "gemini", []*registry.ModelInfo{ + {ID: "gemini-2.5-pro", Created: now + 30}, + {ID: "gemini-2.5-flash", Created: now + 25}, + }) + modelRegistry.RegisterClient("test-request-details-openai", "openai", []*registry.ModelInfo{ + {ID: "gpt-5.2", Created: now + 20}, + }) + modelRegistry.RegisterClient("test-request-details-claude", "claude", []*registry.ModelInfo{ + {ID: "claude-sonnet-4-5", Created: now + 5}, + }) + + // Ensure cleanup of all test registrations. + clientIDs := []string{ + "test-request-details-gemini", + "test-request-details-openai", + "test-request-details-claude", + } + for _, clientID := range clientIDs { + id := clientID + t.Cleanup(func() { + modelRegistry.UnregisterClient(id) + }) + } + + handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, coreauth.NewManager(nil, nil, nil)) + + tests := []struct { + name string + inputModel string + wantProviders []string + wantModel string + wantErr bool + }{ + { + name: "numeric suffix preserved", + inputModel: "gemini-2.5-pro(8192)", + wantProviders: []string{"gemini"}, + wantModel: "gemini-2.5-pro(8192)", + wantErr: false, + }, + { + name: "level suffix preserved", + inputModel: "gpt-5.2(high)", + wantProviders: []string{"openai"}, + wantModel: "gpt-5.2(high)", + wantErr: false, + }, + { + name: "no suffix unchanged", + inputModel: "claude-sonnet-4-5", + wantProviders: []string{"claude"}, + wantModel: "claude-sonnet-4-5", + wantErr: false, + }, + { + name: "unknown model with suffix", + inputModel: "unknown-model(8192)", + wantProviders: nil, + wantModel: "", + wantErr: true, + }, + { + name: "auto suffix resolved", + inputModel: "auto(high)", + wantProviders: []string{"gemini"}, + wantModel: "gemini-2.5-pro(high)", + wantErr: false, + }, + { + name: "special suffix none preserved", + inputModel: "gemini-2.5-flash(none)", + wantProviders: []string{"gemini"}, + wantModel: "gemini-2.5-flash(none)", + wantErr: false, + }, + { + name: "special suffix auto preserved", + inputModel: "claude-sonnet-4-5(auto)", + wantProviders: []string{"claude"}, + wantModel: "claude-sonnet-4-5(auto)", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + providers, model, errMsg := handler.getRequestDetails(tt.inputModel) + if (errMsg != nil) != tt.wantErr { + t.Fatalf("getRequestDetails() error = %v, wantErr %v", errMsg, tt.wantErr) + } + if errMsg != nil { + return + } + if !reflect.DeepEqual(providers, tt.wantProviders) { + t.Fatalf("getRequestDetails() providers = %v, want %v", providers, tt.wantProviders) + } + if model != tt.wantModel { + t.Fatalf("getRequestDetails() model = %v, want %v", model, tt.wantModel) + } + }) + } +} diff --git a/sdk/auth/antigravity.go b/sdk/auth/antigravity.go index ae22f772..210da57f 100644 --- a/sdk/auth/antigravity.go +++ b/sdk/auth/antigravity.go @@ -60,6 +60,11 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o opts = &LoginOptions{} } + callbackPort := antigravityCallbackPort + if opts.CallbackPort > 0 { + callbackPort = opts.CallbackPort + } + httpClient := util.SetProxy(&cfg.SDKConfig, &http.Client{}) state, err := misc.GenerateRandomState() @@ -67,7 +72,7 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o return nil, fmt.Errorf("antigravity: failed to generate state: %w", err) } - srv, port, cbChan, errServer := startAntigravityCallbackServer() + srv, port, cbChan, errServer := startAntigravityCallbackServer(callbackPort) if errServer != nil { return nil, fmt.Errorf("antigravity: failed to start callback server: %w", errServer) } @@ -224,13 +229,16 @@ type callbackResult struct { State string } -func startAntigravityCallbackServer() (*http.Server, int, <-chan callbackResult, error) { - addr := fmt.Sprintf(":%d", antigravityCallbackPort) +func startAntigravityCallbackServer(port int) (*http.Server, int, <-chan callbackResult, error) { + if port <= 0 { + port = antigravityCallbackPort + } + addr := fmt.Sprintf(":%d", port) listener, err := net.Listen("tcp", addr) if err != nil { return nil, 0, nil, err } - port := listener.Addr().(*net.TCPAddr).Port + port = listener.Addr().(*net.TCPAddr).Port resultCh := make(chan callbackResult, 1) mux := http.NewServeMux() @@ -374,7 +382,7 @@ func fetchAntigravityProjectID(ctx context.Context, accessToken string, httpClie // Call loadCodeAssist to get the project loadReqBody := map[string]any{ "metadata": map[string]string{ - "ideType": "IDE_UNSPECIFIED", + "ideType": "ANTIGRAVITY", "platform": "PLATFORM_UNSPECIFIED", "pluginType": "GEMINI", }, @@ -434,8 +442,134 @@ func fetchAntigravityProjectID(ctx context.Context, accessToken string, httpClie } if projectID == "" { - return "", fmt.Errorf("no cloudaicompanionProject in response") + tierID := "legacy-tier" + if tiers, okTiers := loadResp["allowedTiers"].([]any); okTiers { + for _, rawTier := range tiers { + tier, okTier := rawTier.(map[string]any) + if !okTier { + continue + } + if isDefault, okDefault := tier["isDefault"].(bool); okDefault && isDefault { + if id, okID := tier["id"].(string); okID && strings.TrimSpace(id) != "" { + tierID = strings.TrimSpace(id) + break + } + } + } + } + + projectID, err = antigravityOnboardUser(ctx, accessToken, tierID, httpClient) + if err != nil { + return "", err + } + return projectID, nil } return projectID, nil } + +// antigravityOnboardUser attempts to fetch the project ID via onboardUser by polling for completion. +// It returns an empty string when the operation times out or completes without a project ID. +func antigravityOnboardUser(ctx context.Context, accessToken, tierID string, httpClient *http.Client) (string, error) { + if httpClient == nil { + httpClient = http.DefaultClient + } + fmt.Println("Antigravity: onboarding user...", tierID) + requestBody := map[string]any{ + "tierId": tierID, + "metadata": map[string]string{ + "ideType": "ANTIGRAVITY", + "platform": "PLATFORM_UNSPECIFIED", + "pluginType": "GEMINI", + }, + } + + rawBody, errMarshal := json.Marshal(requestBody) + if errMarshal != nil { + return "", fmt.Errorf("marshal request body: %w", errMarshal) + } + + maxAttempts := 5 + for attempt := 1; attempt <= maxAttempts; attempt++ { + log.Debugf("Polling attempt %d/%d", attempt, maxAttempts) + + reqCtx := ctx + var cancel context.CancelFunc + if reqCtx == nil { + reqCtx = context.Background() + } + reqCtx, cancel = context.WithTimeout(reqCtx, 30*time.Second) + + endpointURL := fmt.Sprintf("%s/%s:onboardUser", antigravityAPIEndpoint, antigravityAPIVersion) + req, errRequest := http.NewRequestWithContext(reqCtx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody))) + if errRequest != nil { + cancel() + return "", fmt.Errorf("create request: %w", errRequest) + } + req.Header.Set("Authorization", "Bearer "+accessToken) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", antigravityAPIUserAgent) + req.Header.Set("X-Goog-Api-Client", antigravityAPIClient) + req.Header.Set("Client-Metadata", antigravityClientMetadata) + + resp, errDo := httpClient.Do(req) + if errDo != nil { + cancel() + return "", fmt.Errorf("execute request: %w", errDo) + } + + bodyBytes, errRead := io.ReadAll(resp.Body) + if errClose := resp.Body.Close(); errClose != nil { + log.Errorf("close body error: %v", errClose) + } + cancel() + + if errRead != nil { + return "", fmt.Errorf("read response: %w", errRead) + } + + if resp.StatusCode == http.StatusOK { + var data map[string]any + if errDecode := json.Unmarshal(bodyBytes, &data); errDecode != nil { + return "", fmt.Errorf("decode response: %w", errDecode) + } + + if done, okDone := data["done"].(bool); okDone && done { + projectID := "" + if responseData, okResp := data["response"].(map[string]any); okResp { + switch projectValue := responseData["cloudaicompanionProject"].(type) { + case map[string]any: + if id, okID := projectValue["id"].(string); okID { + projectID = strings.TrimSpace(id) + } + case string: + projectID = strings.TrimSpace(projectValue) + } + } + + if projectID != "" { + log.Infof("Successfully fetched project_id: %s", projectID) + return projectID, nil + } + + return "", fmt.Errorf("no project_id in response") + } + + time.Sleep(2 * time.Second) + continue + } + + responsePreview := strings.TrimSpace(string(bodyBytes)) + if len(responsePreview) > 500 { + responsePreview = responsePreview[:500] + } + + responseErr := responsePreview + if len(responseErr) > 200 { + responseErr = responseErr[:200] + } + return "", fmt.Errorf("http %d: %s", resp.StatusCode, responseErr) + } + + return "", nil +} diff --git a/sdk/auth/claude.go b/sdk/auth/claude.go index c43b78cd..2c7a8988 100644 --- a/sdk/auth/claude.go +++ b/sdk/auth/claude.go @@ -47,6 +47,11 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt opts = &LoginOptions{} } + callbackPort := a.CallbackPort + if opts.CallbackPort > 0 { + callbackPort = opts.CallbackPort + } + pkceCodes, err := claude.GeneratePKCECodes() if err != nil { return nil, fmt.Errorf("claude pkce generation failed: %w", err) @@ -57,7 +62,7 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt return nil, fmt.Errorf("claude state generation failed: %w", err) } - oauthServer := claude.NewOAuthServer(a.CallbackPort) + oauthServer := claude.NewOAuthServer(callbackPort) if err = oauthServer.Start(); err != nil { if strings.Contains(err.Error(), "already in use") { return nil, claude.NewAuthenticationError(claude.ErrPortInUse, err) @@ -84,15 +89,15 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt fmt.Println("Opening browser for Claude authentication") if !browser.IsAvailable() { log.Warn("No browser available; please open the URL manually") - util.PrintSSHTunnelInstructions(a.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } else if err = browser.OpenURL(authURL); err != nil { log.Warnf("Failed to open browser automatically: %v", err) - util.PrintSSHTunnelInstructions(a.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } } else { - util.PrintSSHTunnelInstructions(a.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } diff --git a/sdk/auth/codex.go b/sdk/auth/codex.go index 99992525..b3104b4e 100644 --- a/sdk/auth/codex.go +++ b/sdk/auth/codex.go @@ -47,6 +47,11 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts opts = &LoginOptions{} } + callbackPort := a.CallbackPort + if opts.CallbackPort > 0 { + callbackPort = opts.CallbackPort + } + pkceCodes, err := codex.GeneratePKCECodes() if err != nil { return nil, fmt.Errorf("codex pkce generation failed: %w", err) @@ -57,7 +62,7 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts return nil, fmt.Errorf("codex state generation failed: %w", err) } - oauthServer := codex.NewOAuthServer(a.CallbackPort) + oauthServer := codex.NewOAuthServer(callbackPort) if err = oauthServer.Start(); err != nil { if strings.Contains(err.Error(), "already in use") { return nil, codex.NewAuthenticationError(codex.ErrPortInUse, err) @@ -83,15 +88,15 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts fmt.Println("Opening browser for Codex authentication") if !browser.IsAvailable() { log.Warn("No browser available; please open the URL manually") - util.PrintSSHTunnelInstructions(a.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } else if err = browser.OpenURL(authURL); err != nil { log.Warnf("Failed to open browser automatically: %v", err) - util.PrintSSHTunnelInstructions(a.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } } else { - util.PrintSSHTunnelInstructions(a.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go index 84092d37..db9f7148 100644 --- a/sdk/auth/filestore.go +++ b/sdk/auth/filestore.go @@ -5,8 +5,10 @@ import ( "encoding/json" "fmt" "io/fs" + "net/http" "os" "path/filepath" + "reflect" "strings" "sync" "time" @@ -77,15 +79,23 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str if metadataEqualIgnoringTimestamps(existing, raw) { return path, nil } - } else if errRead != nil && !os.IsNotExist(errRead) { + file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600) + if errOpen != nil { + return "", fmt.Errorf("auth filestore: open existing failed: %w", errOpen) + } + if _, errWrite := file.Write(raw); errWrite != nil { + _ = file.Close() + return "", fmt.Errorf("auth filestore: write existing failed: %w", errWrite) + } + if errClose := file.Close(); errClose != nil { + return "", fmt.Errorf("auth filestore: close existing failed: %w", errClose) + } + return path, nil + } else if !os.IsNotExist(errRead) { return "", fmt.Errorf("auth filestore: read existing failed: %w", errRead) } - tmp := path + ".tmp" - if errWrite := os.WriteFile(tmp, raw, 0o600); errWrite != nil { - return "", fmt.Errorf("auth filestore: write temp failed: %w", errWrite) - } - if errRename := os.Rename(tmp, path); errRename != nil { - return "", fmt.Errorf("auth filestore: rename failed: %w", errRename) + if errWrite := os.WriteFile(path, raw, 0o600); errWrite != nil { + return "", fmt.Errorf("auth filestore: write file failed: %w", errWrite) } default: return "", fmt.Errorf("auth filestore: nothing to persist for %s", auth.ID) @@ -178,6 +188,30 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth, if provider == "" { provider = "unknown" } + if provider == "antigravity" { + projectID := "" + if pid, ok := metadata["project_id"].(string); ok { + projectID = strings.TrimSpace(pid) + } + if projectID == "" { + accessToken := "" + if token, ok := metadata["access_token"].(string); ok { + accessToken = strings.TrimSpace(token) + } + if accessToken != "" { + fetchedProjectID, errFetch := FetchAntigravityProjectID(context.Background(), accessToken, http.DefaultClient) + if errFetch == nil && strings.TrimSpace(fetchedProjectID) != "" { + metadata["project_id"] = strings.TrimSpace(fetchedProjectID) + if raw, errMarshal := json.Marshal(metadata); errMarshal == nil { + if file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600); errOpen == nil { + _, _ = file.Write(raw) + _ = file.Close() + } + } + } + } + } + } info, err := os.Stat(path) if err != nil { return nil, fmt.Errorf("stat file: %w", err) @@ -266,92 +300,28 @@ func (s *FileTokenStore) baseDirSnapshot() string { return s.baseDir } -// DEPRECATED: Use metadataEqualIgnoringTimestamps for comparing auth metadata. -// This function is kept for backward compatibility but can cause refresh loops. -func jsonEqual(a, b []byte) bool { - var objA any - var objB any - if err := json.Unmarshal(a, &objA); err != nil { - return false - } - if err := json.Unmarshal(b, &objB); err != nil { - return false - } - return deepEqualJSON(objA, objB) -} - -// metadataEqualIgnoringTimestamps compares two metadata JSON blobs, -// ignoring fields that change on every refresh but don't affect functionality. -// This prevents unnecessary file writes that would trigger watcher events and -// create refresh loops. +// metadataEqualIgnoringTimestamps compares two metadata JSON blobs, ignoring volatile fields that +// change on every refresh but don't affect authentication logic. func metadataEqualIgnoringTimestamps(a, b []byte) bool { - var objA, objB map[string]any - if err := json.Unmarshal(a, &objA); err != nil { + var objA map[string]any + var objB map[string]any + if errUnmarshalA := json.Unmarshal(a, &objA); errUnmarshalA != nil { return false } - if err := json.Unmarshal(b, &objB); err != nil { + if errUnmarshalB := json.Unmarshal(b, &objB); errUnmarshalB != nil { return false } - - // Fields to ignore: these change on every refresh but don't affect authentication logic. - // - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh - // - access_token: Google OAuth returns a new access_token on each refresh, this is expected - // and shouldn't trigger file writes (the new token will be fetched again when needed) - ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh", "access_token"} - for _, field := range ignoredFields { - delete(objA, field) - delete(objB, field) - } - - return deepEqualJSON(objA, objB) + stripVolatileMetadataFields(objA) + stripVolatileMetadataFields(objB) + return reflect.DeepEqual(objA, objB) } -func deepEqualJSON(a, b any) bool { - switch valA := a.(type) { - case map[string]any: - valB, ok := b.(map[string]any) - if !ok || len(valA) != len(valB) { - return false - } - for key, subA := range valA { - subB, ok1 := valB[key] - if !ok1 || !deepEqualJSON(subA, subB) { - return false - } - } - return true - case []any: - sliceB, ok := b.([]any) - if !ok || len(valA) != len(sliceB) { - return false - } - for i := range valA { - if !deepEqualJSON(valA[i], sliceB[i]) { - return false - } - } - return true - case float64: - valB, ok := b.(float64) - if !ok { - return false - } - return valA == valB - case string: - valB, ok := b.(string) - if !ok { - return false - } - return valA == valB - case bool: - valB, ok := b.(bool) - if !ok { - return false - } - return valA == valB - case nil: - return b == nil - default: - return false +func stripVolatileMetadataFields(metadata map[string]any) { + if metadata == nil { + return + } + // These fields change on refresh and would otherwise trigger watcher reload loops. + for _, field := range []string{"timestamp", "expired", "expires_in", "last_refresh", "access_token"} { + delete(metadata, field) } } diff --git a/sdk/auth/gemini.go b/sdk/auth/gemini.go index 75ef4579..2b8f9c2b 100644 --- a/sdk/auth/gemini.go +++ b/sdk/auth/gemini.go @@ -45,8 +45,9 @@ func (a *GeminiAuthenticator) Login(ctx context.Context, cfg *config.Config, opt geminiAuth := gemini.NewGeminiAuth() _, err := geminiAuth.GetAuthenticatedClient(ctx, &ts, cfg, &gemini.WebLoginOptions{ - NoBrowser: opts.NoBrowser, - Prompt: opts.Prompt, + NoBrowser: opts.NoBrowser, + CallbackPort: opts.CallbackPort, + Prompt: opts.Prompt, }) if err != nil { return nil, fmt.Errorf("gemini authentication failed: %w", err) diff --git a/sdk/auth/iflow.go b/sdk/auth/iflow.go index 3fd82f1d..6d4ff946 100644 --- a/sdk/auth/iflow.go +++ b/sdk/auth/iflow.go @@ -42,9 +42,14 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts opts = &LoginOptions{} } + callbackPort := iflow.CallbackPort + if opts.CallbackPort > 0 { + callbackPort = opts.CallbackPort + } + authSvc := iflow.NewIFlowAuth(cfg) - oauthServer := iflow.NewOAuthServer(iflow.CallbackPort) + oauthServer := iflow.NewOAuthServer(callbackPort) if err := oauthServer.Start(); err != nil { if strings.Contains(err.Error(), "already in use") { return nil, fmt.Errorf("iflow authentication server port in use: %w", err) @@ -64,21 +69,21 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts return nil, fmt.Errorf("iflow auth: failed to generate state: %w", err) } - authURL, redirectURI := authSvc.AuthorizationURL(state, iflow.CallbackPort) + authURL, redirectURI := authSvc.AuthorizationURL(state, callbackPort) if !opts.NoBrowser { fmt.Println("Opening browser for iFlow authentication") if !browser.IsAvailable() { log.Warn("No browser available; please open the URL manually") - util.PrintSSHTunnelInstructions(iflow.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } else if err = browser.OpenURL(authURL); err != nil { log.Warnf("Failed to open browser automatically: %v", err) - util.PrintSSHTunnelInstructions(iflow.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } } else { - util.PrintSSHTunnelInstructions(iflow.CallbackPort) + util.PrintSSHTunnelInstructions(callbackPort) fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL) } diff --git a/sdk/auth/interfaces.go b/sdk/auth/interfaces.go index 7a7868e1..64cf8ed0 100644 --- a/sdk/auth/interfaces.go +++ b/sdk/auth/interfaces.go @@ -14,10 +14,11 @@ var ErrRefreshNotSupported = errors.New("cliproxy auth: refresh not supported") // LoginOptions captures generic knobs shared across authenticators. // Provider-specific logic can inspect Metadata for extra parameters. type LoginOptions struct { - NoBrowser bool - ProjectID string - Metadata map[string]string - Prompt func(prompt string) (string, error) + NoBrowser bool + ProjectID string + CallbackPort int + Metadata map[string]string + Prompt func(prompt string) (string, error) } // Authenticator manages login and optional refresh flows for a provider. diff --git a/sdk/cliproxy/auth/api_key_model_alias_test.go b/sdk/cliproxy/auth/api_key_model_alias_test.go new file mode 100644 index 00000000..70915d9e --- /dev/null +++ b/sdk/cliproxy/auth/api_key_model_alias_test.go @@ -0,0 +1,180 @@ +package auth + +import ( + "context" + "testing" + + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" +) + +func TestLookupAPIKeyUpstreamModel(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + { + APIKey: "k", + BaseURL: "https://example.com", + Models: []internalconfig.GeminiModel{ + {Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}, + {Name: "gemini-2.5-flash(low)", Alias: "g25f"}, + }, + }, + }, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + _, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k", "base_url": "https://example.com"}}) + + tests := []struct { + name string + authID string + input string + want string + }{ + // Fast path + suffix preservation + {"alias with suffix", "a1", "g25p(8192)", "gemini-2.5-pro-exp-03-25(8192)"}, + {"alias without suffix", "a1", "g25p", "gemini-2.5-pro-exp-03-25"}, + + // Config suffix takes priority + {"config suffix priority", "a1", "g25f(high)", "gemini-2.5-flash(low)"}, + {"config suffix no user suffix", "a1", "g25f", "gemini-2.5-flash(low)"}, + + // Case insensitive + {"uppercase alias", "a1", "G25P", "gemini-2.5-pro-exp-03-25"}, + {"mixed case with suffix", "a1", "G25p(4096)", "gemini-2.5-pro-exp-03-25(4096)"}, + + // Direct name lookup + {"upstream name direct", "a1", "gemini-2.5-pro-exp-03-25", "gemini-2.5-pro-exp-03-25"}, + {"upstream name with suffix", "a1", "gemini-2.5-pro-exp-03-25(8192)", "gemini-2.5-pro-exp-03-25(8192)"}, + + // Cache miss scenarios + {"non-existent auth", "non-existent", "g25p", ""}, + {"unknown alias", "a1", "unknown-alias", ""}, + {"empty auth ID", "", "g25p", ""}, + {"empty model", "a1", "", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input) + if resolved != tt.want { + t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want) + } + }) + } +} + +func TestAPIKeyModelAlias_ConfigHotReload(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + { + APIKey: "k", + Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}}, + }, + }, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + _, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}}) + + // Initial alias + if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-pro-exp-03-25" { + t.Fatalf("before reload: got %q, want %q", resolved, "gemini-2.5-pro-exp-03-25") + } + + // Hot reload with new alias + mgr.SetConfig(&internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + { + APIKey: "k", + Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-flash", Alias: "g25p"}}, + }, + }, + }) + + // New alias should take effect + if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-flash" { + t.Fatalf("after reload: got %q, want %q", resolved, "gemini-2.5-flash") + } +} + +func TestAPIKeyModelAlias_MultipleProviders(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{{APIKey: "gemini-key", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro", Alias: "gp"}}}}, + ClaudeKey: []internalconfig.ClaudeKey{{APIKey: "claude-key", Models: []internalconfig.ClaudeModel{{Name: "claude-sonnet-4", Alias: "cs4"}}}}, + CodexKey: []internalconfig.CodexKey{{APIKey: "codex-key", Models: []internalconfig.CodexModel{{Name: "o3", Alias: "o"}}}}, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + _, _ = mgr.Register(ctx, &Auth{ID: "gemini-auth", Provider: "gemini", Attributes: map[string]string{"api_key": "gemini-key"}}) + _, _ = mgr.Register(ctx, &Auth{ID: "claude-auth", Provider: "claude", Attributes: map[string]string{"api_key": "claude-key"}}) + _, _ = mgr.Register(ctx, &Auth{ID: "codex-auth", Provider: "codex", Attributes: map[string]string{"api_key": "codex-key"}}) + + tests := []struct { + authID, input, want string + }{ + {"gemini-auth", "gp", "gemini-2.5-pro"}, + {"claude-auth", "cs4", "claude-sonnet-4"}, + {"codex-auth", "o", "o3"}, + } + + for _, tt := range tests { + if resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input); resolved != tt.want { + t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want) + } + } +} + +func TestApplyAPIKeyModelAlias(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + {APIKey: "k", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}}}, + }, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + apiKeyAuth := &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}} + oauthAuth := &Auth{ID: "oauth-auth", Provider: "gemini", Attributes: map[string]string{"auth_kind": "oauth"}} + _, _ = mgr.Register(ctx, apiKeyAuth) + + tests := []struct { + name string + auth *Auth + inputModel string + wantModel string + }{ + { + name: "api_key auth with alias", + auth: apiKeyAuth, + inputModel: "g25p(8192)", + wantModel: "gemini-2.5-pro-exp-03-25(8192)", + }, + { + name: "oauth auth passthrough", + auth: oauthAuth, + inputModel: "some-model", + wantModel: "some-model", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resolvedModel := mgr.applyAPIKeyModelAlias(tt.auth, tt.inputModel) + + if resolvedModel != tt.wantModel { + t.Errorf("model = %q, want %q", resolvedModel, tt.wantModel) + } + }) + } +} diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 431e2259..43483672 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -15,8 +15,10 @@ import ( "time" "github.com/google/uuid" + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/logging" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" log "github.com/sirupsen/logrus" @@ -117,8 +119,16 @@ type Manager struct { requestRetry atomic.Int32 maxRetryInterval atomic.Int64 - // modelNameMappings stores global model name alias mappings (alias -> upstream name) keyed by channel. - modelNameMappings atomic.Value + // oauthModelAlias stores global OAuth model alias mappings (alias -> upstream name) keyed by channel. + oauthModelAlias atomic.Value + + // apiKeyModelAlias caches resolved model alias mappings for API-key auths. + // Keyed by auth.ID, value is alias(lower) -> upstream model (including suffix). + apiKeyModelAlias atomic.Value + + // runtimeConfig stores the latest application config for request-time decisions. + // It is initialized in NewManager; never Load() before first Store(). + runtimeConfig atomic.Value // Optional HTTP RoundTripper provider injected by host. rtProvider RoundTripperProvider @@ -135,7 +145,7 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager { if hook == nil { hook = NoopHook{} } - return &Manager{ + manager := &Manager{ store: store, executors: make(map[string]ProviderExecutor), selector: selector, @@ -143,6 +153,10 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager { auths: make(map[string]*Auth), providerOffsets: make(map[string]int), } + // atomic.Value requires non-nil initial value. + manager.runtimeConfig.Store(&internalconfig.Config{}) + manager.apiKeyModelAlias.Store(apiKeyModelAliasTable(nil)) + return manager } func (m *Manager) SetSelector(selector Selector) { @@ -171,6 +185,181 @@ func (m *Manager) SetRoundTripperProvider(p RoundTripperProvider) { m.mu.Unlock() } +// SetConfig updates the runtime config snapshot used by request-time helpers. +// Callers should provide the latest config on reload so per-credential alias mapping stays in sync. +func (m *Manager) SetConfig(cfg *internalconfig.Config) { + if m == nil { + return + } + if cfg == nil { + cfg = &internalconfig.Config{} + } + m.runtimeConfig.Store(cfg) + m.rebuildAPIKeyModelAliasFromRuntimeConfig() +} + +func (m *Manager) lookupAPIKeyUpstreamModel(authID, requestedModel string) string { + if m == nil { + return "" + } + authID = strings.TrimSpace(authID) + if authID == "" { + return "" + } + requestedModel = strings.TrimSpace(requestedModel) + if requestedModel == "" { + return "" + } + table, _ := m.apiKeyModelAlias.Load().(apiKeyModelAliasTable) + if table == nil { + return "" + } + byAlias := table[authID] + if len(byAlias) == 0 { + return "" + } + key := strings.ToLower(thinking.ParseSuffix(requestedModel).ModelName) + if key == "" { + key = strings.ToLower(requestedModel) + } + resolved := strings.TrimSpace(byAlias[key]) + if resolved == "" { + return "" + } + // Preserve thinking suffix from the client's requested model unless config already has one. + requestResult := thinking.ParseSuffix(requestedModel) + if thinking.ParseSuffix(resolved).HasSuffix { + return resolved + } + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return resolved + "(" + requestResult.RawSuffix + ")" + } + return resolved + +} + +func (m *Manager) rebuildAPIKeyModelAliasFromRuntimeConfig() { + if m == nil { + return + } + cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config) + if cfg == nil { + cfg = &internalconfig.Config{} + } + m.mu.Lock() + defer m.mu.Unlock() + m.rebuildAPIKeyModelAliasLocked(cfg) +} + +func (m *Manager) rebuildAPIKeyModelAliasLocked(cfg *internalconfig.Config) { + if m == nil { + return + } + if cfg == nil { + cfg = &internalconfig.Config{} + } + + out := make(apiKeyModelAliasTable) + for _, auth := range m.auths { + if auth == nil { + continue + } + if strings.TrimSpace(auth.ID) == "" { + continue + } + kind, _ := auth.AccountInfo() + if !strings.EqualFold(strings.TrimSpace(kind), "api_key") { + continue + } + + byAlias := make(map[string]string) + provider := strings.ToLower(strings.TrimSpace(auth.Provider)) + switch provider { + case "gemini": + if entry := resolveGeminiAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelAliasForModels(byAlias, entry.Models) + } + case "claude": + if entry := resolveClaudeAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelAliasForModels(byAlias, entry.Models) + } + case "codex": + if entry := resolveCodexAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelAliasForModels(byAlias, entry.Models) + } + case "vertex": + if entry := resolveVertexAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelAliasForModels(byAlias, entry.Models) + } + default: + // OpenAI-compat uses config selection from auth.Attributes. + providerKey := "" + compatName := "" + if auth.Attributes != nil { + providerKey = strings.TrimSpace(auth.Attributes["provider_key"]) + compatName = strings.TrimSpace(auth.Attributes["compat_name"]) + } + if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") { + if entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider); entry != nil { + compileAPIKeyModelAliasForModels(byAlias, entry.Models) + } + } + } + + if len(byAlias) > 0 { + out[auth.ID] = byAlias + } + } + + m.apiKeyModelAlias.Store(out) +} + +func compileAPIKeyModelAliasForModels[T interface { + GetName() string + GetAlias() string +}](out map[string]string, models []T) { + if out == nil { + return + } + for i := range models { + alias := strings.TrimSpace(models[i].GetAlias()) + name := strings.TrimSpace(models[i].GetName()) + if alias == "" || name == "" { + continue + } + aliasKey := strings.ToLower(thinking.ParseSuffix(alias).ModelName) + if aliasKey == "" { + aliasKey = strings.ToLower(alias) + } + // Config priority: first alias wins. + if _, exists := out[aliasKey]; exists { + continue + } + out[aliasKey] = name + // Also allow direct lookup by upstream name (case-insensitive), so lookups on already-upstream + // models remain a cheap no-op. + nameKey := strings.ToLower(thinking.ParseSuffix(name).ModelName) + if nameKey == "" { + nameKey = strings.ToLower(name) + } + if nameKey != "" { + if _, exists := out[nameKey]; !exists { + out[nameKey] = name + } + } + // Preserve config suffix priority by seeding a base-name lookup when name already has suffix. + nameResult := thinking.ParseSuffix(name) + if nameResult.HasSuffix { + baseKey := strings.ToLower(strings.TrimSpace(nameResult.ModelName)) + if baseKey != "" { + if _, exists := out[baseKey]; !exists { + out[baseKey] = name + } + } + } + } +} + // SetRetryConfig updates retry attempts and cooldown wait interval. func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) { if m == nil { @@ -219,6 +408,7 @@ func (m *Manager) Register(ctx context.Context, auth *Auth) (*Auth, error) { m.mu.Lock() m.auths[auth.ID] = auth.Clone() m.mu.Unlock() + m.rebuildAPIKeyModelAliasFromRuntimeConfig() _ = m.persist(ctx, auth) m.hook.OnAuthRegistered(ctx, auth.Clone()) return auth.Clone(), nil @@ -237,6 +427,7 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) { auth.EnsureIndex() m.auths[auth.ID] = auth.Clone() m.mu.Unlock() + m.rebuildAPIKeyModelAliasFromRuntimeConfig() _ = m.persist(ctx, auth) m.hook.OnAuthUpdated(ctx, auth.Clone()) return auth.Clone(), nil @@ -261,6 +452,11 @@ func (m *Manager) Load(ctx context.Context) error { auth.EnsureIndex() m.auths[auth.ID] = auth.Clone() } + cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config) + if cfg == nil { + cfg = &internalconfig.Config{} + } + m.rebuildAPIKeyModelAliasLocked(cfg) return nil } @@ -271,7 +467,6 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye if len(normalized) == 0 { return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} } - rotated := m.rotateProviders(req.Model, normalized) retryTimes, maxWait := m.retrySettings() attempts := retryTimes + 1 @@ -281,14 +476,12 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye var lastErr error for attempt := 0; attempt < attempts; attempt++ { - resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) { - return m.executeWithProvider(execCtx, provider, req, opts) - }) + resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts) if errExec == nil { return resp, nil } lastErr = errExec - wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait) + wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait) if !shouldRetry { break } @@ -309,7 +502,6 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip if len(normalized) == 0 { return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} } - rotated := m.rotateProviders(req.Model, normalized) retryTimes, maxWait := m.retrySettings() attempts := retryTimes + 1 @@ -319,14 +511,12 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip var lastErr error for attempt := 0; attempt < attempts; attempt++ { - resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) { - return m.executeCountWithProvider(execCtx, provider, req, opts) - }) + resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts) if errExec == nil { return resp, nil } lastErr = errExec - wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait) + wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait) if !shouldRetry { break } @@ -347,7 +537,6 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli if len(normalized) == 0 { return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"} } - rotated := m.rotateProviders(req.Model, normalized) retryTimes, maxWait := m.retrySettings() attempts := retryTimes + 1 @@ -357,14 +546,12 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli var lastErr error for attempt := 0; attempt < attempts; attempt++ { - chunks, errStream := m.executeStreamProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (<-chan cliproxyexecutor.StreamChunk, error) { - return m.executeStreamWithProvider(execCtx, provider, req, opts) - }) + chunks, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts) if errStream == nil { return chunks, nil } lastErr = errStream - wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, rotated, req.Model, maxWait) + wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, normalized, req.Model, maxWait) if !shouldRetry { break } @@ -378,6 +565,170 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli return nil, &Error{Code: "auth_not_found", Message: "no auth available"} } +func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + if len(providers) == 0 { + return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} + } + routeModel := req.Model + tried := make(map[string]struct{}) + var lastErr error + for { + auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried) + if errPick != nil { + if lastErr != nil { + return cliproxyexecutor.Response{}, lastErr + } + return cliproxyexecutor.Response{}, errPick + } + + entry := logEntryWithRequestID(ctx) + debugLogAuthSelection(entry, auth, provider, req.Model) + + tried[auth.ID] = struct{}{} + execCtx := ctx + if rt := m.roundTripperFor(auth); rt != nil { + execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt) + execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) + } + execReq := req + execReq.Model = rewriteModelForAuth(routeModel, auth) + execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model) + execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model) + resp, errExec := executor.Execute(execCtx, auth, execReq, opts) + result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} + if errExec != nil { + result.Error = &Error{Message: errExec.Error()} + var se cliproxyexecutor.StatusError + if errors.As(errExec, &se) && se != nil { + result.Error.HTTPStatus = se.StatusCode() + } + if ra := retryAfterFromError(errExec); ra != nil { + result.RetryAfter = ra + } + m.MarkResult(execCtx, result) + lastErr = errExec + continue + } + m.MarkResult(execCtx, result) + return resp, nil + } +} + +func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + if len(providers) == 0 { + return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} + } + routeModel := req.Model + tried := make(map[string]struct{}) + var lastErr error + for { + auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried) + if errPick != nil { + if lastErr != nil { + return cliproxyexecutor.Response{}, lastErr + } + return cliproxyexecutor.Response{}, errPick + } + + entry := logEntryWithRequestID(ctx) + debugLogAuthSelection(entry, auth, provider, req.Model) + + tried[auth.ID] = struct{}{} + execCtx := ctx + if rt := m.roundTripperFor(auth); rt != nil { + execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt) + execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) + } + execReq := req + execReq.Model = rewriteModelForAuth(routeModel, auth) + execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model) + execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model) + resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts) + result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} + if errExec != nil { + result.Error = &Error{Message: errExec.Error()} + var se cliproxyexecutor.StatusError + if errors.As(errExec, &se) && se != nil { + result.Error.HTTPStatus = se.StatusCode() + } + if ra := retryAfterFromError(errExec); ra != nil { + result.RetryAfter = ra + } + m.MarkResult(execCtx, result) + lastErr = errExec + continue + } + m.MarkResult(execCtx, result) + return resp, nil + } +} + +func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (<-chan cliproxyexecutor.StreamChunk, error) { + if len(providers) == 0 { + return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"} + } + routeModel := req.Model + tried := make(map[string]struct{}) + var lastErr error + for { + auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried) + if errPick != nil { + if lastErr != nil { + return nil, lastErr + } + return nil, errPick + } + + entry := logEntryWithRequestID(ctx) + debugLogAuthSelection(entry, auth, provider, req.Model) + + tried[auth.ID] = struct{}{} + execCtx := ctx + if rt := m.roundTripperFor(auth); rt != nil { + execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt) + execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) + } + execReq := req + execReq.Model = rewriteModelForAuth(routeModel, auth) + execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model) + execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model) + chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts) + if errStream != nil { + rerr := &Error{Message: errStream.Error()} + var se cliproxyexecutor.StatusError + if errors.As(errStream, &se) && se != nil { + rerr.HTTPStatus = se.StatusCode() + } + result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: false, Error: rerr} + result.RetryAfter = retryAfterFromError(errStream) + m.MarkResult(execCtx, result) + lastErr = errStream + continue + } + out := make(chan cliproxyexecutor.StreamChunk) + go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) { + defer close(out) + var failed bool + for chunk := range streamChunks { + if chunk.Err != nil && !failed { + failed = true + rerr := &Error{Message: chunk.Err.Error()} + var se cliproxyexecutor.StatusError + if errors.As(chunk.Err, &se) && se != nil { + rerr.HTTPStatus = se.StatusCode() + } + m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr}) + } + out <- chunk + } + if !failed { + m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true}) + } + }(execCtx, auth.Clone(), provider, chunks) + return out, nil + } +} + func (m *Manager) executeWithProvider(ctx context.Context, provider string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { if provider == "" { return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "provider identifier is empty"} @@ -404,8 +755,9 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) } execReq := req - execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) - execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) + execReq.Model = rewriteModelForAuth(routeModel, auth) + execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model) + execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model) resp, errExec := executor.Execute(execCtx, auth, execReq, opts) result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { @@ -452,8 +804,9 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string, execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) } execReq := req - execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) - execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) + execReq.Model = rewriteModelForAuth(routeModel, auth) + execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model) + execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model) resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts) result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { @@ -500,8 +853,9 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) } execReq := req - execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) - execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) + execReq.Model = rewriteModelForAuth(routeModel, auth) + execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model) + execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model) chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts) if errStream != nil { rerr := &Error{Message: errStream.Error()} @@ -539,51 +893,229 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string } } -func rewriteModelForAuth(model string, metadata map[string]any, auth *Auth) (string, map[string]any) { +func rewriteModelForAuth(model string, auth *Auth) string { if auth == nil || model == "" { - return model, metadata + return model } prefix := strings.TrimSpace(auth.Prefix) if prefix == "" { - return model, metadata + return model } needle := prefix + "/" if !strings.HasPrefix(model, needle) { - return model, metadata + return model } - rewritten := strings.TrimPrefix(model, needle) - return rewritten, stripPrefixFromMetadata(metadata, needle) + return strings.TrimPrefix(model, needle) } -func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string]any { - if len(metadata) == 0 || needle == "" { - return metadata +func (m *Manager) applyAPIKeyModelAlias(auth *Auth, requestedModel string) string { + if m == nil || auth == nil { + return requestedModel } - keys := []string{ - util.ThinkingOriginalModelMetadataKey, - util.GeminiOriginalModelMetadataKey, - util.ModelMappingOriginalModelMetadataKey, + + kind, _ := auth.AccountInfo() + if !strings.EqualFold(strings.TrimSpace(kind), "api_key") { + return requestedModel } - var out map[string]any - for _, key := range keys { - raw, ok := metadata[key] - if !ok { + + requestedModel = strings.TrimSpace(requestedModel) + if requestedModel == "" { + return requestedModel + } + + // Fast path: lookup per-auth mapping table (keyed by auth.ID). + if resolved := m.lookupAPIKeyUpstreamModel(auth.ID, requestedModel); resolved != "" { + return resolved + } + + // Slow path: scan config for the matching credential entry and resolve alias. + // This acts as a safety net if mappings are stale or auth.ID is missing. + cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config) + if cfg == nil { + cfg = &internalconfig.Config{} + } + + provider := strings.ToLower(strings.TrimSpace(auth.Provider)) + upstreamModel := "" + switch provider { + case "gemini": + upstreamModel = resolveUpstreamModelForGeminiAPIKey(cfg, auth, requestedModel) + case "claude": + upstreamModel = resolveUpstreamModelForClaudeAPIKey(cfg, auth, requestedModel) + case "codex": + upstreamModel = resolveUpstreamModelForCodexAPIKey(cfg, auth, requestedModel) + case "vertex": + upstreamModel = resolveUpstreamModelForVertexAPIKey(cfg, auth, requestedModel) + default: + upstreamModel = resolveUpstreamModelForOpenAICompatAPIKey(cfg, auth, requestedModel) + } + + // Return upstream model if found, otherwise return requested model. + if upstreamModel != "" { + return upstreamModel + } + return requestedModel +} + +// APIKeyConfigEntry is a generic interface for API key configurations. +type APIKeyConfigEntry interface { + GetAPIKey() string + GetBaseURL() string +} + +func resolveAPIKeyConfig[T APIKeyConfigEntry](entries []T, auth *Auth) *T { + if auth == nil || len(entries) == 0 { + return nil + } + attrKey, attrBase := "", "" + if auth.Attributes != nil { + attrKey = strings.TrimSpace(auth.Attributes["api_key"]) + attrBase = strings.TrimSpace(auth.Attributes["base_url"]) + } + for i := range entries { + entry := &entries[i] + cfgKey := strings.TrimSpace((*entry).GetAPIKey()) + cfgBase := strings.TrimSpace((*entry).GetBaseURL()) + if attrKey != "" && attrBase != "" { + if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) { + return entry + } continue } - value, okStr := raw.(string) - if !okStr || !strings.HasPrefix(value, needle) { - continue - } - if out == nil { - out = make(map[string]any, len(metadata)) - for k, v := range metadata { - out[k] = v + if attrKey != "" && strings.EqualFold(cfgKey, attrKey) { + if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) { + return entry } } - out[key] = strings.TrimPrefix(value, needle) + if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) { + return entry + } } - if out == nil { - return metadata + if attrKey != "" { + for i := range entries { + entry := &entries[i] + if strings.EqualFold(strings.TrimSpace((*entry).GetAPIKey()), attrKey) { + return entry + } + } + } + return nil +} + +func resolveGeminiAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.GeminiKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.GeminiKey, auth) +} + +func resolveClaudeAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.ClaudeKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.ClaudeKey, auth) +} + +func resolveCodexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.CodexKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.CodexKey, auth) +} + +func resolveVertexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.VertexCompatKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.VertexCompatAPIKey, auth) +} + +func resolveUpstreamModelForGeminiAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveGeminiAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForClaudeAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveClaudeAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForCodexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveCodexAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForVertexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveVertexAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForOpenAICompatAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + providerKey := "" + compatName := "" + if auth != nil && len(auth.Attributes) > 0 { + providerKey = strings.TrimSpace(auth.Attributes["provider_key"]) + compatName = strings.TrimSpace(auth.Attributes["compat_name"]) + } + if compatName == "" && !strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") { + return "" + } + entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +type apiKeyModelAliasTable map[string]map[string]string + +func resolveOpenAICompatConfig(cfg *internalconfig.Config, providerKey, compatName, authProvider string) *internalconfig.OpenAICompatibility { + if cfg == nil { + return nil + } + candidates := make([]string, 0, 3) + if v := strings.TrimSpace(compatName); v != "" { + candidates = append(candidates, v) + } + if v := strings.TrimSpace(providerKey); v != "" { + candidates = append(candidates, v) + } + if v := strings.TrimSpace(authProvider); v != "" { + candidates = append(candidates, v) + } + for i := range cfg.OpenAICompatibility { + compat := &cfg.OpenAICompatibility[i] + for _, candidate := range candidates { + if candidate != "" && strings.EqualFold(strings.TrimSpace(candidate), compat.Name) { + return compat + } + } + } + return nil +} + +func asModelAliasEntries[T interface { + GetName() string + GetAlias() string +}](models []T) []modelAliasEntry { + if len(models) == 0 { + return nil + } + out := make([]modelAliasEntry, 0, len(models)) + for i := range models { + out = append(out, models[i]) } return out } @@ -1152,6 +1684,13 @@ func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cli } candidates := make([]*Auth, 0, len(m.auths)) modelKey := strings.TrimSpace(model) + // Always use base model name (without thinking suffix) for auth matching. + if modelKey != "" { + parsed := thinking.ParseSuffix(modelKey) + if parsed.ModelName != "" { + modelKey = strings.TrimSpace(parsed.ModelName) + } + } registryRef := registry.GetGlobalRegistry() for _, candidate := range m.auths { if candidate.Provider != provider || candidate.Disabled { @@ -1191,6 +1730,84 @@ func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cli return authCopy, executor, nil } +func (m *Manager) pickNextMixed(ctx context.Context, providers []string, model string, opts cliproxyexecutor.Options, tried map[string]struct{}) (*Auth, ProviderExecutor, string, error) { + providerSet := make(map[string]struct{}, len(providers)) + for _, provider := range providers { + p := strings.TrimSpace(strings.ToLower(provider)) + if p == "" { + continue + } + providerSet[p] = struct{}{} + } + if len(providerSet) == 0 { + return nil, nil, "", &Error{Code: "provider_not_found", Message: "no provider supplied"} + } + + m.mu.RLock() + candidates := make([]*Auth, 0, len(m.auths)) + modelKey := strings.TrimSpace(model) + // Always use base model name (without thinking suffix) for auth matching. + if modelKey != "" { + parsed := thinking.ParseSuffix(modelKey) + if parsed.ModelName != "" { + modelKey = strings.TrimSpace(parsed.ModelName) + } + } + registryRef := registry.GetGlobalRegistry() + for _, candidate := range m.auths { + if candidate == nil || candidate.Disabled { + continue + } + providerKey := strings.TrimSpace(strings.ToLower(candidate.Provider)) + if providerKey == "" { + continue + } + if _, ok := providerSet[providerKey]; !ok { + continue + } + if _, used := tried[candidate.ID]; used { + continue + } + if _, ok := m.executors[providerKey]; !ok { + continue + } + if modelKey != "" && registryRef != nil && !registryRef.ClientSupportsModel(candidate.ID, modelKey) { + continue + } + candidates = append(candidates, candidate) + } + if len(candidates) == 0 { + m.mu.RUnlock() + return nil, nil, "", &Error{Code: "auth_not_found", Message: "no auth available"} + } + selected, errPick := m.selector.Pick(ctx, "mixed", model, opts, candidates) + if errPick != nil { + m.mu.RUnlock() + return nil, nil, "", errPick + } + if selected == nil { + m.mu.RUnlock() + return nil, nil, "", &Error{Code: "auth_not_found", Message: "selector returned no auth"} + } + providerKey := strings.TrimSpace(strings.ToLower(selected.Provider)) + executor, okExecutor := m.executors[providerKey] + if !okExecutor { + m.mu.RUnlock() + return nil, nil, "", &Error{Code: "executor_not_found", Message: "executor not registered"} + } + authCopy := selected.Clone() + m.mu.RUnlock() + if !selected.indexAssigned { + m.mu.Lock() + if current := m.auths[authCopy.ID]; current != nil && !current.indexAssigned { + current.EnsureIndex() + authCopy = current.Clone() + } + m.mu.Unlock() + } + return authCopy, executor, providerKey, nil +} + func (m *Manager) persist(ctx context.Context, auth *Auth) error { if m.store == nil || auth == nil { return nil diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/model_name_mappings.go deleted file mode 100644 index 03380c09..00000000 --- a/sdk/cliproxy/auth/model_name_mappings.go +++ /dev/null @@ -1,171 +0,0 @@ -package auth - -import ( - "strings" - - internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" -) - -type modelNameMappingTable struct { - // reverse maps channel -> alias (lower) -> original upstream model name. - reverse map[string]map[string]string -} - -func compileModelNameMappingTable(mappings map[string][]internalconfig.ModelNameMapping) *modelNameMappingTable { - if len(mappings) == 0 { - return &modelNameMappingTable{} - } - out := &modelNameMappingTable{ - reverse: make(map[string]map[string]string, len(mappings)), - } - for rawChannel, entries := range mappings { - channel := strings.ToLower(strings.TrimSpace(rawChannel)) - if channel == "" || len(entries) == 0 { - continue - } - rev := make(map[string]string, len(entries)) - for _, entry := range entries { - name := strings.TrimSpace(entry.Name) - alias := strings.TrimSpace(entry.Alias) - if name == "" || alias == "" { - continue - } - if strings.EqualFold(name, alias) { - continue - } - aliasKey := strings.ToLower(alias) - if _, exists := rev[aliasKey]; exists { - continue - } - rev[aliasKey] = name - } - if len(rev) > 0 { - out.reverse[channel] = rev - } - } - if len(out.reverse) == 0 { - out.reverse = nil - } - return out -} - -// SetOAuthModelMappings updates the OAuth model name mapping table used during execution. -// The mapping is applied per-auth channel to resolve the upstream model name while keeping the -// client-visible model name unchanged for translation/response formatting. -func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.ModelNameMapping) { - if m == nil { - return - } - table := compileModelNameMappingTable(mappings) - // atomic.Value requires non-nil store values. - if table == nil { - table = &modelNameMappingTable{} - } - m.modelNameMappings.Store(table) -} - -// applyOAuthModelMapping resolves the upstream model from OAuth model mappings -// and returns the resolved model along with updated metadata. If a mapping exists, -// the returned model is the upstream model and metadata contains the original -// requested model for response translation. -func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) { - upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel) - if upstreamModel == "" { - return requestedModel, metadata - } - out := make(map[string]any, 1) - if len(metadata) > 0 { - out = make(map[string]any, len(metadata)+1) - for k, v := range metadata { - out[k] = v - } - } - // Store the requested alias (e.g., "gp") so downstream can use it to look up - // model metadata from the global registry where it was registered under this alias. - out[util.ModelMappingOriginalModelMetadataKey] = requestedModel - return upstreamModel, out -} - -func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string { - if m == nil || auth == nil { - return "" - } - channel := modelMappingChannel(auth) - if channel == "" { - return "" - } - key := strings.ToLower(strings.TrimSpace(requestedModel)) - if key == "" { - return "" - } - raw := m.modelNameMappings.Load() - table, _ := raw.(*modelNameMappingTable) - if table == nil || table.reverse == nil { - return "" - } - rev := table.reverse[channel] - if rev == nil { - return "" - } - original := strings.TrimSpace(rev[key]) - if original == "" || strings.EqualFold(original, requestedModel) { - return "" - } - return original -} - -// modelMappingChannel extracts the OAuth model mapping channel from an Auth object. -// It determines the provider and auth kind from the Auth's attributes and delegates -// to OAuthModelMappingChannel for the actual channel resolution. -func modelMappingChannel(auth *Auth) string { - if auth == nil { - return "" - } - provider := strings.ToLower(strings.TrimSpace(auth.Provider)) - authKind := "" - if auth.Attributes != nil { - authKind = strings.ToLower(strings.TrimSpace(auth.Attributes["auth_kind"])) - } - if authKind == "" { - if kind, _ := auth.AccountInfo(); strings.EqualFold(kind, "api_key") { - authKind = "apikey" - } - } - return OAuthModelMappingChannel(provider, authKind) -} - -// OAuthModelMappingChannel returns the OAuth model mapping channel name for a given provider -// and auth kind. Returns empty string if the provider/authKind combination doesn't support -// OAuth model mappings (e.g., API key authentication). -// -// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow. -func OAuthModelMappingChannel(provider, authKind string) string { - provider = strings.ToLower(strings.TrimSpace(provider)) - authKind = strings.ToLower(strings.TrimSpace(authKind)) - switch provider { - case "gemini": - // gemini provider uses gemini-api-key config, not oauth-model-mappings. - // OAuth-based gemini auth is converted to "gemini-cli" by the synthesizer. - return "" - case "vertex": - if authKind == "apikey" { - return "" - } - return "vertex" - case "claude": - if authKind == "apikey" { - return "" - } - return "claude" - case "codex": - if authKind == "apikey" { - return "" - } - return "codex" - case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow": - return provider - default: - return "" - } -} diff --git a/sdk/cliproxy/auth/oauth_model_alias.go b/sdk/cliproxy/auth/oauth_model_alias.go new file mode 100644 index 00000000..4111663e --- /dev/null +++ b/sdk/cliproxy/auth/oauth_model_alias.go @@ -0,0 +1,253 @@ +package auth + +import ( + "strings" + + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" +) + +type modelAliasEntry interface { + GetName() string + GetAlias() string +} + +type oauthModelAliasTable struct { + // reverse maps channel -> alias (lower) -> original upstream model name. + reverse map[string]map[string]string +} + +func compileOAuthModelAliasTable(aliases map[string][]internalconfig.OAuthModelAlias) *oauthModelAliasTable { + if len(aliases) == 0 { + return &oauthModelAliasTable{} + } + out := &oauthModelAliasTable{ + reverse: make(map[string]map[string]string, len(aliases)), + } + for rawChannel, entries := range aliases { + channel := strings.ToLower(strings.TrimSpace(rawChannel)) + if channel == "" || len(entries) == 0 { + continue + } + rev := make(map[string]string, len(entries)) + for _, entry := range entries { + name := strings.TrimSpace(entry.Name) + alias := strings.TrimSpace(entry.Alias) + if name == "" || alias == "" { + continue + } + if strings.EqualFold(name, alias) { + continue + } + aliasKey := strings.ToLower(alias) + if _, exists := rev[aliasKey]; exists { + continue + } + rev[aliasKey] = name + } + if len(rev) > 0 { + out.reverse[channel] = rev + } + } + if len(out.reverse) == 0 { + out.reverse = nil + } + return out +} + +// SetOAuthModelAlias updates the OAuth model name alias table used during execution. +// The alias is applied per-auth channel to resolve the upstream model name while keeping the +// client-visible model name unchanged for translation/response formatting. +func (m *Manager) SetOAuthModelAlias(aliases map[string][]internalconfig.OAuthModelAlias) { + if m == nil { + return + } + table := compileOAuthModelAliasTable(aliases) + // atomic.Value requires non-nil store values. + if table == nil { + table = &oauthModelAliasTable{} + } + m.oauthModelAlias.Store(table) +} + +// applyOAuthModelAlias resolves the upstream model from OAuth model alias. +// If an alias exists, the returned model is the upstream model. +func (m *Manager) applyOAuthModelAlias(auth *Auth, requestedModel string) string { + upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel) + if upstreamModel == "" { + return requestedModel + } + return upstreamModel +} + +func resolveModelAliasFromConfigModels(requestedModel string, models []modelAliasEntry) string { + requestedModel = strings.TrimSpace(requestedModel) + if requestedModel == "" { + return "" + } + if len(models) == 0 { + return "" + } + + requestResult := thinking.ParseSuffix(requestedModel) + base := requestResult.ModelName + candidates := []string{base} + if base != requestedModel { + candidates = append(candidates, requestedModel) + } + + preserveSuffix := func(resolved string) string { + resolved = strings.TrimSpace(resolved) + if resolved == "" { + return "" + } + if thinking.ParseSuffix(resolved).HasSuffix { + return resolved + } + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return resolved + "(" + requestResult.RawSuffix + ")" + } + return resolved + } + + for i := range models { + name := strings.TrimSpace(models[i].GetName()) + alias := strings.TrimSpace(models[i].GetAlias()) + for _, candidate := range candidates { + if candidate == "" { + continue + } + if alias != "" && strings.EqualFold(alias, candidate) { + if name != "" { + return preserveSuffix(name) + } + return preserveSuffix(candidate) + } + if name != "" && strings.EqualFold(name, candidate) { + return preserveSuffix(name) + } + } + } + return "" +} + +// resolveOAuthUpstreamModel resolves the upstream model name from OAuth model alias. +// If an alias exists, returns the original (upstream) model name that corresponds +// to the requested alias. +// +// If the requested model contains a thinking suffix (e.g., "gemini-2.5-pro(8192)"), +// the suffix is preserved in the returned model name. However, if the alias's +// original name already contains a suffix, the config suffix takes priority. +func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string { + return resolveUpstreamModelFromAliasTable(m, auth, requestedModel, modelAliasChannel(auth)) +} + +func resolveUpstreamModelFromAliasTable(m *Manager, auth *Auth, requestedModel, channel string) string { + if m == nil || auth == nil { + return "" + } + if channel == "" { + return "" + } + + // Extract thinking suffix from requested model using ParseSuffix + requestResult := thinking.ParseSuffix(requestedModel) + baseModel := requestResult.ModelName + + // Candidate keys to match: base model and raw input (handles suffix-parsing edge cases). + candidates := []string{baseModel} + if baseModel != requestedModel { + candidates = append(candidates, requestedModel) + } + + raw := m.oauthModelAlias.Load() + table, _ := raw.(*oauthModelAliasTable) + if table == nil || table.reverse == nil { + return "" + } + rev := table.reverse[channel] + if rev == nil { + return "" + } + + for _, candidate := range candidates { + key := strings.ToLower(strings.TrimSpace(candidate)) + if key == "" { + continue + } + original := strings.TrimSpace(rev[key]) + if original == "" { + continue + } + if strings.EqualFold(original, baseModel) { + return "" + } + + // If config already has suffix, it takes priority. + if thinking.ParseSuffix(original).HasSuffix { + return original + } + // Preserve user's thinking suffix on the resolved model. + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return original + "(" + requestResult.RawSuffix + ")" + } + return original + } + + return "" +} + +// modelAliasChannel extracts the OAuth model alias channel from an Auth object. +// It determines the provider and auth kind from the Auth's attributes and delegates +// to OAuthModelAliasChannel for the actual channel resolution. +func modelAliasChannel(auth *Auth) string { + if auth == nil { + return "" + } + provider := strings.ToLower(strings.TrimSpace(auth.Provider)) + authKind := "" + if auth.Attributes != nil { + authKind = strings.ToLower(strings.TrimSpace(auth.Attributes["auth_kind"])) + } + if authKind == "" { + if kind, _ := auth.AccountInfo(); strings.EqualFold(kind, "api_key") { + authKind = "apikey" + } + } + return OAuthModelAliasChannel(provider, authKind) +} + +// OAuthModelAliasChannel returns the OAuth model alias channel name for a given provider +// and auth kind. Returns empty string if the provider/authKind combination doesn't support +// OAuth model alias (e.g., API key authentication). +// +// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow. +func OAuthModelAliasChannel(provider, authKind string) string { + provider = strings.ToLower(strings.TrimSpace(provider)) + authKind = strings.ToLower(strings.TrimSpace(authKind)) + switch provider { + case "gemini": + // gemini provider uses gemini-api-key config, not oauth-model-alias. + // OAuth-based gemini auth is converted to "gemini-cli" by the synthesizer. + return "" + case "vertex": + if authKind == "apikey" { + return "" + } + return "vertex" + case "claude": + if authKind == "apikey" { + return "" + } + return "claude" + case "codex": + if authKind == "apikey" { + return "" + } + return "codex" + case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow": + return provider + default: + return "" + } +} diff --git a/sdk/cliproxy/auth/oauth_model_alias_test.go b/sdk/cliproxy/auth/oauth_model_alias_test.go new file mode 100644 index 00000000..6956411c --- /dev/null +++ b/sdk/cliproxy/auth/oauth_model_alias_test.go @@ -0,0 +1,177 @@ +package auth + +import ( + "testing" + + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" +) + +func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + aliases map[string][]internalconfig.OAuthModelAlias + channel string + input string + want string + }{ + { + name: "numeric suffix preserved", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(8192)", + want: "gemini-2.5-pro-exp-03-25(8192)", + }, + { + name: "level suffix preserved", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "claude": {{Name: "claude-sonnet-4-5-20250514", Alias: "claude-sonnet-4-5"}}, + }, + channel: "claude", + input: "claude-sonnet-4-5(high)", + want: "claude-sonnet-4-5-20250514(high)", + }, + { + name: "no suffix unchanged", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro", + want: "gemini-2.5-pro-exp-03-25", + }, + { + name: "config suffix takes priority", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "claude": {{Name: "claude-sonnet-4-5-20250514(low)", Alias: "claude-sonnet-4-5"}}, + }, + channel: "claude", + input: "claude-sonnet-4-5(high)", + want: "claude-sonnet-4-5-20250514(low)", + }, + { + name: "auto suffix preserved", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(auto)", + want: "gemini-2.5-pro-exp-03-25(auto)", + }, + { + name: "none suffix preserved", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(none)", + want: "gemini-2.5-pro-exp-03-25(none)", + }, + { + name: "case insensitive alias lookup with suffix", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "Gemini-2.5-Pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(high)", + want: "gemini-2.5-pro-exp-03-25(high)", + }, + { + name: "no alias returns empty", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "unknown-model(high)", + want: "", + }, + { + name: "wrong channel returns empty", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "claude", + input: "gemini-2.5-pro(high)", + want: "", + }, + { + name: "empty suffix filtered out", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro()", + want: "gemini-2.5-pro-exp-03-25", + }, + { + name: "incomplete suffix treated as no suffix", + aliases: map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro(high"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(high", + want: "gemini-2.5-pro-exp-03-25", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(&internalconfig.Config{}) + mgr.SetOAuthModelAlias(tt.aliases) + + auth := createAuthForChannel(tt.channel) + got := mgr.resolveOAuthUpstreamModel(auth, tt.input) + if got != tt.want { + t.Errorf("resolveOAuthUpstreamModel(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func createAuthForChannel(channel string) *Auth { + switch channel { + case "gemini-cli": + return &Auth{Provider: "gemini-cli"} + case "claude": + return &Auth{Provider: "claude", Attributes: map[string]string{"auth_kind": "oauth"}} + case "vertex": + return &Auth{Provider: "vertex", Attributes: map[string]string{"auth_kind": "oauth"}} + case "codex": + return &Auth{Provider: "codex", Attributes: map[string]string{"auth_kind": "oauth"}} + case "aistudio": + return &Auth{Provider: "aistudio"} + case "antigravity": + return &Auth{Provider: "antigravity"} + case "qwen": + return &Auth{Provider: "qwen"} + case "iflow": + return &Auth{Provider: "iflow"} + default: + return &Auth{Provider: channel} + } +} + +func TestApplyOAuthModelAlias_SuffixPreservation(t *testing.T) { + t.Parallel() + + aliases := map[string][]internalconfig.OAuthModelAlias{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(&internalconfig.Config{}) + mgr.SetOAuthModelAlias(aliases) + + auth := &Auth{ID: "test-auth-id", Provider: "gemini-cli"} + + resolvedModel := mgr.applyOAuthModelAlias(auth, "gemini-2.5-pro(8192)") + if resolvedModel != "gemini-2.5-pro-exp-03-25(8192)" { + t.Errorf("applyOAuthModelAlias() model = %q, want %q", resolvedModel, "gemini-2.5-pro-exp-03-25(8192)") + } +} diff --git a/sdk/cliproxy/auth/selector.go b/sdk/cliproxy/auth/selector.go index d7e120c5..7febf219 100644 --- a/sdk/cliproxy/auth/selector.go +++ b/sdk/cliproxy/auth/selector.go @@ -8,6 +8,7 @@ import ( "net/http" "sort" "strconv" + "strings" "sync" "time" @@ -103,13 +104,29 @@ func (e *modelCooldownError) Headers() http.Header { return headers } -func collectAvailable(auths []*Auth, model string, now time.Time) (available []*Auth, cooldownCount int, earliest time.Time) { - available = make([]*Auth, 0, len(auths)) +func authPriority(auth *Auth) int { + if auth == nil || auth.Attributes == nil { + return 0 + } + raw := strings.TrimSpace(auth.Attributes["priority"]) + if raw == "" { + return 0 + } + parsed, err := strconv.Atoi(raw) + if err != nil { + return 0 + } + return parsed +} + +func collectAvailableByPriority(auths []*Auth, model string, now time.Time) (available map[int][]*Auth, cooldownCount int, earliest time.Time) { + available = make(map[int][]*Auth) for i := 0; i < len(auths); i++ { candidate := auths[i] blocked, reason, next := isAuthBlockedForModel(candidate, model, now) if !blocked { - available = append(available, candidate) + priority := authPriority(candidate) + available[priority] = append(available[priority], candidate) continue } if reason == blockReasonCooldown { @@ -119,9 +136,6 @@ func collectAvailable(auths []*Auth, model string, now time.Time) (available []* } } } - if len(available) > 1 { - sort.Slice(available, func(i, j int) bool { return available[i].ID < available[j].ID }) - } return available, cooldownCount, earliest } @@ -130,18 +144,35 @@ func getAvailableAuths(auths []*Auth, provider, model string, now time.Time) ([] return nil, &Error{Code: "auth_not_found", Message: "no auth candidates"} } - available, cooldownCount, earliest := collectAvailable(auths, model, now) - if len(available) == 0 { + availableByPriority, cooldownCount, earliest := collectAvailableByPriority(auths, model, now) + if len(availableByPriority) == 0 { if cooldownCount == len(auths) && !earliest.IsZero() { + providerForError := provider + if providerForError == "mixed" { + providerForError = "" + } resetIn := earliest.Sub(now) if resetIn < 0 { resetIn = 0 } - return nil, newModelCooldownError(model, provider, resetIn) + return nil, newModelCooldownError(model, providerForError, resetIn) } return nil, &Error{Code: "auth_unavailable", Message: "no auth available"} } + bestPriority := 0 + found := false + for priority := range availableByPriority { + if !found || priority > bestPriority { + bestPriority = priority + found = true + } + } + + available := availableByPriority[bestPriority] + if len(available) > 1 { + sort.Slice(available, func(i, j int) bool { return available[i].ID < available[j].ID }) + } return available, nil } diff --git a/sdk/cliproxy/auth/selector_test.go b/sdk/cliproxy/auth/selector_test.go index f4beed03..91a7ed14 100644 --- a/sdk/cliproxy/auth/selector_test.go +++ b/sdk/cliproxy/auth/selector_test.go @@ -5,6 +5,7 @@ import ( "errors" "sync" "testing" + "time" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" ) @@ -56,6 +57,69 @@ func TestRoundRobinSelectorPick_CyclesDeterministic(t *testing.T) { } } +func TestRoundRobinSelectorPick_PriorityBuckets(t *testing.T) { + t.Parallel() + + selector := &RoundRobinSelector{} + auths := []*Auth{ + {ID: "c", Attributes: map[string]string{"priority": "0"}}, + {ID: "a", Attributes: map[string]string{"priority": "10"}}, + {ID: "b", Attributes: map[string]string{"priority": "10"}}, + } + + want := []string{"a", "b", "a", "b"} + for i, id := range want { + got, err := selector.Pick(context.Background(), "mixed", "", cliproxyexecutor.Options{}, auths) + if err != nil { + t.Fatalf("Pick() #%d error = %v", i, err) + } + if got == nil { + t.Fatalf("Pick() #%d auth = nil", i) + } + if got.ID != id { + t.Fatalf("Pick() #%d auth.ID = %q, want %q", i, got.ID, id) + } + if got.ID == "c" { + t.Fatalf("Pick() #%d unexpectedly selected lower priority auth", i) + } + } +} + +func TestFillFirstSelectorPick_PriorityFallbackCooldown(t *testing.T) { + t.Parallel() + + selector := &FillFirstSelector{} + now := time.Now() + model := "test-model" + + high := &Auth{ + ID: "high", + Attributes: map[string]string{"priority": "10"}, + ModelStates: map[string]*ModelState{ + model: { + Status: StatusActive, + Unavailable: true, + NextRetryAfter: now.Add(30 * time.Minute), + Quota: QuotaState{ + Exceeded: true, + }, + }, + }, + } + low := &Auth{ID: "low", Attributes: map[string]string{"priority": "0"}} + + got, err := selector.Pick(context.Background(), "mixed", model, cliproxyexecutor.Options{}, []*Auth{high, low}) + if err != nil { + t.Fatalf("Pick() error = %v", err) + } + if got == nil { + t.Fatalf("Pick() auth = nil") + } + if got.ID != "low" { + t.Fatalf("Pick() auth.ID = %q, want %q", got.ID, "low") + } +} + func TestRoundRobinSelectorPick_Concurrent(t *testing.T) { selector := &RoundRobinSelector{} auths := []*Auth{ diff --git a/sdk/cliproxy/builder.go b/sdk/cliproxy/builder.go index 51d5dbac..5eba18a0 100644 --- a/sdk/cliproxy/builder.go +++ b/sdk/cliproxy/builder.go @@ -215,7 +215,8 @@ func (b *Builder) Build() (*Service, error) { } // Attach a default RoundTripper provider so providers can opt-in per-auth transports. coreManager.SetRoundTripperProvider(newDefaultRoundTripperProvider()) - coreManager.SetOAuthModelMappings(b.cfg.OAuthModelMappings) + coreManager.SetConfig(b.cfg) + coreManager.SetOAuthModelAlias(b.cfg.OAuthModelAlias) service := &Service{ cfg: b.cfg, diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 695a77c8..5b343e49 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -553,7 +553,8 @@ func (s *Service) Run(ctx context.Context) error { s.cfg = newCfg s.cfgMu.Unlock() if s.coreManager != nil { - s.coreManager.SetOAuthModelMappings(newCfg.OAuthModelMappings) + s.coreManager.SetConfig(newCfg) + s.coreManager.SetOAuthModelAlias(newCfg.OAuthModelAlias) } s.rebindExecutors() } @@ -825,6 +826,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { OwnedBy: compat.Name, Type: "openai-compatibility", DisplayName: modelID, + UserDefined: true, }) } // Register and return @@ -847,7 +849,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { } } } - models = applyOAuthModelMappings(s.cfg, provider, authKind, models) + models = applyOAuthModelAlias(s.cfg, provider, authKind, models) if len(models) > 0 { key := provider if key == "" { @@ -1157,6 +1159,7 @@ func buildConfigModels[T modelEntry](models []T, ownedBy, modelType string) []*M OwnedBy: ownedBy, Type: modelType, DisplayName: display, + UserDefined: true, } if name != "" { if upstream := registry.LookupStaticModelInfo(name); upstream != nil && upstream.Thinking != nil { @@ -1209,6 +1212,9 @@ func rewriteModelInfoName(name, oldID, newID string) string { if strings.EqualFold(oldID, newID) { return name } + if strings.EqualFold(trimmed, oldID) { + return newID + } if strings.HasSuffix(trimmed, "/"+oldID) { prefix := strings.TrimSuffix(trimmed, oldID) return prefix + newID @@ -1219,28 +1225,28 @@ func rewriteModelInfoName(name, oldID, newID string) string { return name } -func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, models []*ModelInfo) []*ModelInfo { +func applyOAuthModelAlias(cfg *config.Config, provider, authKind string, models []*ModelInfo) []*ModelInfo { if cfg == nil || len(models) == 0 { return models } - channel := coreauth.OAuthModelMappingChannel(provider, authKind) - if channel == "" || len(cfg.OAuthModelMappings) == 0 { + channel := coreauth.OAuthModelAliasChannel(provider, authKind) + if channel == "" || len(cfg.OAuthModelAlias) == 0 { return models } - mappings := cfg.OAuthModelMappings[channel] - if len(mappings) == 0 { + aliases := cfg.OAuthModelAlias[channel] + if len(aliases) == 0 { return models } - type mappingEntry struct { + type aliasEntry struct { alias string fork bool } - forward := make(map[string][]mappingEntry, len(mappings)) - for i := range mappings { - name := strings.TrimSpace(mappings[i].Name) - alias := strings.TrimSpace(mappings[i].Alias) + forward := make(map[string][]aliasEntry, len(aliases)) + for i := range aliases { + name := strings.TrimSpace(aliases[i].Name) + alias := strings.TrimSpace(aliases[i].Alias) if name == "" || alias == "" { continue } @@ -1248,7 +1254,7 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode continue } key := strings.ToLower(name) - forward[key] = append(forward[key], mappingEntry{alias: alias, fork: mappings[i].Fork}) + forward[key] = append(forward[key], aliasEntry{alias: alias, fork: aliases[i].Fork}) } if len(forward) == 0 { return models diff --git a/sdk/cliproxy/service_oauth_model_mappings_test.go b/sdk/cliproxy/service_oauth_model_alias_test.go similarity index 77% rename from sdk/cliproxy/service_oauth_model_mappings_test.go rename to sdk/cliproxy/service_oauth_model_alias_test.go index ca9ff35a..2caf7a17 100644 --- a/sdk/cliproxy/service_oauth_model_mappings_test.go +++ b/sdk/cliproxy/service_oauth_model_alias_test.go @@ -6,9 +6,9 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/sdk/config" ) -func TestApplyOAuthModelMappings_Rename(t *testing.T) { +func TestApplyOAuthModelAlias_Rename(t *testing.T) { cfg := &config.Config{ - OAuthModelMappings: map[string][]config.ModelNameMapping{ + OAuthModelAlias: map[string][]config.OAuthModelAlias{ "codex": { {Name: "gpt-5", Alias: "g5"}, }, @@ -18,7 +18,7 @@ func TestApplyOAuthModelMappings_Rename(t *testing.T) { {ID: "gpt-5", Name: "models/gpt-5"}, } - out := applyOAuthModelMappings(cfg, "codex", "oauth", models) + out := applyOAuthModelAlias(cfg, "codex", "oauth", models) if len(out) != 1 { t.Fatalf("expected 1 model, got %d", len(out)) } @@ -30,9 +30,9 @@ func TestApplyOAuthModelMappings_Rename(t *testing.T) { } } -func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) { +func TestApplyOAuthModelAlias_ForkAddsAlias(t *testing.T) { cfg := &config.Config{ - OAuthModelMappings: map[string][]config.ModelNameMapping{ + OAuthModelAlias: map[string][]config.OAuthModelAlias{ "codex": { {Name: "gpt-5", Alias: "g5", Fork: true}, }, @@ -42,7 +42,7 @@ func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) { {ID: "gpt-5", Name: "models/gpt-5"}, } - out := applyOAuthModelMappings(cfg, "codex", "oauth", models) + out := applyOAuthModelAlias(cfg, "codex", "oauth", models) if len(out) != 2 { t.Fatalf("expected 2 models, got %d", len(out)) } @@ -57,9 +57,9 @@ func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) { } } -func TestApplyOAuthModelMappings_ForkAddsMultipleAliases(t *testing.T) { +func TestApplyOAuthModelAlias_ForkAddsMultipleAliases(t *testing.T) { cfg := &config.Config{ - OAuthModelMappings: map[string][]config.ModelNameMapping{ + OAuthModelAlias: map[string][]config.OAuthModelAlias{ "codex": { {Name: "gpt-5", Alias: "g5", Fork: true}, {Name: "gpt-5", Alias: "g5-2", Fork: true}, @@ -70,7 +70,7 @@ func TestApplyOAuthModelMappings_ForkAddsMultipleAliases(t *testing.T) { {ID: "gpt-5", Name: "models/gpt-5"}, } - out := applyOAuthModelMappings(cfg, "codex", "oauth", models) + out := applyOAuthModelAlias(cfg, "codex", "oauth", models) if len(out) != 3 { t.Fatalf("expected 3 models, got %d", len(out)) } diff --git a/sdk/config/config.go b/sdk/config/config.go index 1ae7ba20..304ccdd8 100644 --- a/sdk/config/config.go +++ b/sdk/config/config.go @@ -16,7 +16,7 @@ type StreamingConfig = internalconfig.StreamingConfig type TLSConfig = internalconfig.TLSConfig type RemoteManagement = internalconfig.RemoteManagement type AmpCode = internalconfig.AmpCode -type ModelNameMapping = internalconfig.ModelNameMapping +type OAuthModelAlias = internalconfig.OAuthModelAlias type PayloadConfig = internalconfig.PayloadConfig type PayloadRule = internalconfig.PayloadRule type PayloadModelRule = internalconfig.PayloadModelRule diff --git a/test/gemini3_thinking_level_test.go b/test/gemini3_thinking_level_test.go deleted file mode 100644 index b26bcff3..00000000 --- a/test/gemini3_thinking_level_test.go +++ /dev/null @@ -1,423 +0,0 @@ -package test - -import ( - "fmt" - "testing" - "time" - - "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" - "github.com/tidwall/gjson" -) - -// registerGemini3Models loads Gemini 3 models into the registry for testing. -func registerGemini3Models(t *testing.T) func() { - t.Helper() - reg := registry.GetGlobalRegistry() - uid := fmt.Sprintf("gemini3-test-%d", time.Now().UnixNano()) - reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels()) - reg.RegisterClient(uid+"-aistudio", "aistudio", registry.GetAIStudioModels()) - return func() { - reg.UnregisterClient(uid + "-gemini") - reg.UnregisterClient(uid + "-aistudio") - } -} - -func TestIsGemini3Model(t *testing.T) { - cases := []struct { - model string - expected bool - }{ - {"gemini-3-pro-preview", true}, - {"gemini-3-flash-preview", true}, - {"gemini_3_pro_preview", true}, - {"gemini-3-pro", true}, - {"gemini-3-flash", true}, - {"GEMINI-3-PRO-PREVIEW", true}, - {"gemini-2.5-pro", false}, - {"gemini-2.5-flash", false}, - {"gpt-5", false}, - {"claude-sonnet-4-5", false}, - {"", false}, - } - - for _, cs := range cases { - t.Run(cs.model, func(t *testing.T) { - got := util.IsGemini3Model(cs.model) - if got != cs.expected { - t.Fatalf("IsGemini3Model(%q) = %v, want %v", cs.model, got, cs.expected) - } - }) - } -} - -func TestIsGemini3ProModel(t *testing.T) { - cases := []struct { - model string - expected bool - }{ - {"gemini-3-pro-preview", true}, - {"gemini_3_pro_preview", true}, - {"gemini-3-pro", true}, - {"GEMINI-3-PRO-PREVIEW", true}, - {"gemini-3-flash-preview", false}, - {"gemini-3-flash", false}, - {"gemini-2.5-pro", false}, - {"", false}, - } - - for _, cs := range cases { - t.Run(cs.model, func(t *testing.T) { - got := util.IsGemini3ProModel(cs.model) - if got != cs.expected { - t.Fatalf("IsGemini3ProModel(%q) = %v, want %v", cs.model, got, cs.expected) - } - }) - } -} - -func TestIsGemini3FlashModel(t *testing.T) { - cases := []struct { - model string - expected bool - }{ - {"gemini-3-flash-preview", true}, - {"gemini_3_flash_preview", true}, - {"gemini-3-flash", true}, - {"GEMINI-3-FLASH-PREVIEW", true}, - {"gemini-3-pro-preview", false}, - {"gemini-3-pro", false}, - {"gemini-2.5-flash", false}, - {"", false}, - } - - for _, cs := range cases { - t.Run(cs.model, func(t *testing.T) { - got := util.IsGemini3FlashModel(cs.model) - if got != cs.expected { - t.Fatalf("IsGemini3FlashModel(%q) = %v, want %v", cs.model, got, cs.expected) - } - }) - } -} - -func TestValidateGemini3ThinkingLevel(t *testing.T) { - cases := []struct { - name string - model string - level string - wantOK bool - wantVal string - }{ - // Gemini 3 Pro: supports "low", "high" - {"pro-low", "gemini-3-pro-preview", "low", true, "low"}, - {"pro-high", "gemini-3-pro-preview", "high", true, "high"}, - {"pro-minimal-invalid", "gemini-3-pro-preview", "minimal", false, ""}, - {"pro-medium-invalid", "gemini-3-pro-preview", "medium", false, ""}, - - // Gemini 3 Flash: supports "minimal", "low", "medium", "high" - {"flash-minimal", "gemini-3-flash-preview", "minimal", true, "minimal"}, - {"flash-low", "gemini-3-flash-preview", "low", true, "low"}, - {"flash-medium", "gemini-3-flash-preview", "medium", true, "medium"}, - {"flash-high", "gemini-3-flash-preview", "high", true, "high"}, - - // Case insensitivity - {"flash-LOW-case", "gemini-3-flash-preview", "LOW", true, "low"}, - {"flash-High-case", "gemini-3-flash-preview", "High", true, "high"}, - {"pro-HIGH-case", "gemini-3-pro-preview", "HIGH", true, "high"}, - - // Invalid levels - {"flash-invalid", "gemini-3-flash-preview", "xhigh", false, ""}, - {"flash-invalid-auto", "gemini-3-flash-preview", "auto", false, ""}, - {"flash-empty", "gemini-3-flash-preview", "", false, ""}, - - // Non-Gemini 3 models - {"non-gemini3", "gemini-2.5-pro", "high", false, ""}, - {"gpt5", "gpt-5", "high", false, ""}, - } - - for _, cs := range cases { - t.Run(cs.name, func(t *testing.T) { - got, ok := util.ValidateGemini3ThinkingLevel(cs.model, cs.level) - if ok != cs.wantOK { - t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) ok = %v, want %v", cs.model, cs.level, ok, cs.wantOK) - } - if got != cs.wantVal { - t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) = %q, want %q", cs.model, cs.level, got, cs.wantVal) - } - }) - } -} - -func TestThinkingBudgetToGemini3Level(t *testing.T) { - cases := []struct { - name string - model string - budget int - wantOK bool - wantVal string - }{ - // Gemini 3 Pro: maps to "low" or "high" - {"pro-dynamic", "gemini-3-pro-preview", -1, true, "high"}, - {"pro-zero", "gemini-3-pro-preview", 0, true, "low"}, - {"pro-small", "gemini-3-pro-preview", 1000, true, "low"}, - {"pro-medium", "gemini-3-pro-preview", 8000, true, "low"}, - {"pro-large", "gemini-3-pro-preview", 20000, true, "high"}, - {"pro-huge", "gemini-3-pro-preview", 50000, true, "high"}, - - // Gemini 3 Flash: maps to "minimal", "low", "medium", "high" - {"flash-dynamic", "gemini-3-flash-preview", -1, true, "high"}, - {"flash-zero", "gemini-3-flash-preview", 0, true, "minimal"}, - {"flash-tiny", "gemini-3-flash-preview", 500, true, "minimal"}, - {"flash-small", "gemini-3-flash-preview", 1000, true, "low"}, - {"flash-medium-val", "gemini-3-flash-preview", 8000, true, "medium"}, - {"flash-large", "gemini-3-flash-preview", 20000, true, "high"}, - {"flash-huge", "gemini-3-flash-preview", 50000, true, "high"}, - - // Non-Gemini 3 models should return false - {"gemini25-budget", "gemini-2.5-pro", 8000, false, ""}, - {"gpt5-budget", "gpt-5", 8000, false, ""}, - } - - for _, cs := range cases { - t.Run(cs.name, func(t *testing.T) { - got, ok := util.ThinkingBudgetToGemini3Level(cs.model, cs.budget) - if ok != cs.wantOK { - t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) ok = %v, want %v", cs.model, cs.budget, ok, cs.wantOK) - } - if got != cs.wantVal { - t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) = %q, want %q", cs.model, cs.budget, got, cs.wantVal) - } - }) - } -} - -func TestApplyGemini3ThinkingLevelFromMetadata(t *testing.T) { - cleanup := registerGemini3Models(t) - defer cleanup() - - cases := []struct { - name string - model string - metadata map[string]any - inputBody string - wantLevel string - wantInclude bool - wantNoChange bool - }{ - { - name: "flash-minimal-from-suffix", - model: "gemini-3-flash-preview", - metadata: map[string]any{"reasoning_effort": "minimal"}, - inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, - wantLevel: "minimal", - wantInclude: true, - }, - { - name: "flash-medium-from-suffix", - model: "gemini-3-flash-preview", - metadata: map[string]any{"reasoning_effort": "medium"}, - inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, - wantLevel: "medium", - wantInclude: true, - }, - { - name: "pro-high-from-suffix", - model: "gemini-3-pro-preview", - metadata: map[string]any{"reasoning_effort": "high"}, - inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, - wantLevel: "high", - wantInclude: true, - }, - { - name: "no-metadata-no-change", - model: "gemini-3-flash-preview", - metadata: nil, - inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, - wantNoChange: true, - }, - { - name: "non-gemini3-no-change", - model: "gemini-2.5-pro", - metadata: map[string]any{"reasoning_effort": "high"}, - inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, - wantNoChange: true, - }, - { - name: "invalid-level-no-change", - model: "gemini-3-flash-preview", - metadata: map[string]any{"reasoning_effort": "xhigh"}, - inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, - wantNoChange: true, - }, - } - - for _, cs := range cases { - t.Run(cs.name, func(t *testing.T) { - input := []byte(cs.inputBody) - result := util.ApplyGemini3ThinkingLevelFromMetadata(cs.model, cs.metadata, input) - - if cs.wantNoChange { - if string(result) != cs.inputBody { - t.Fatalf("expected no change, but got: %s", string(result)) - } - return - } - - level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel") - if !level.Exists() { - t.Fatalf("thinkingLevel not set in result: %s", string(result)) - } - if level.String() != cs.wantLevel { - t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel) - } - - include := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts") - if cs.wantInclude && (!include.Exists() || !include.Bool()) { - t.Fatalf("includeThoughts should be true, got: %s", string(result)) - } - }) - } -} - -func TestApplyGemini3ThinkingLevelFromMetadataCLI(t *testing.T) { - cleanup := registerGemini3Models(t) - defer cleanup() - - cases := []struct { - name string - model string - metadata map[string]any - inputBody string - wantLevel string - wantInclude bool - wantNoChange bool - }{ - { - name: "flash-minimal-from-suffix-cli", - model: "gemini-3-flash-preview", - metadata: map[string]any{"reasoning_effort": "minimal"}, - inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, - wantLevel: "minimal", - wantInclude: true, - }, - { - name: "flash-low-from-suffix-cli", - model: "gemini-3-flash-preview", - metadata: map[string]any{"reasoning_effort": "low"}, - inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, - wantLevel: "low", - wantInclude: true, - }, - { - name: "pro-low-from-suffix-cli", - model: "gemini-3-pro-preview", - metadata: map[string]any{"reasoning_effort": "low"}, - inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, - wantLevel: "low", - wantInclude: true, - }, - { - name: "no-metadata-no-change-cli", - model: "gemini-3-flash-preview", - metadata: nil, - inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, - wantNoChange: true, - }, - { - name: "non-gemini3-no-change-cli", - model: "gemini-2.5-pro", - metadata: map[string]any{"reasoning_effort": "high"}, - inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}}`, - wantNoChange: true, - }, - } - - for _, cs := range cases { - t.Run(cs.name, func(t *testing.T) { - input := []byte(cs.inputBody) - result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(cs.model, cs.metadata, input) - - if cs.wantNoChange { - if string(result) != cs.inputBody { - t.Fatalf("expected no change, but got: %s", string(result)) - } - return - } - - level := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel") - if !level.Exists() { - t.Fatalf("thinkingLevel not set in result: %s", string(result)) - } - if level.String() != cs.wantLevel { - t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel) - } - - include := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts") - if cs.wantInclude && (!include.Exists() || !include.Bool()) { - t.Fatalf("includeThoughts should be true, got: %s", string(result)) - } - }) - } -} - -func TestNormalizeGeminiThinkingBudget_Gemini3Conversion(t *testing.T) { - cleanup := registerGemini3Models(t) - defer cleanup() - - cases := []struct { - name string - model string - inputBody string - wantLevel string - wantBudget bool // if true, expect thinkingBudget instead of thinkingLevel - }{ - { - name: "gemini3-flash-budget-to-level", - model: "gemini-3-flash-preview", - inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`, - wantLevel: "medium", - }, - { - name: "gemini3-pro-budget-to-level", - model: "gemini-3-pro-preview", - inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":20000}}}`, - wantLevel: "high", - }, - { - name: "gemini25-keeps-budget", - model: "gemini-2.5-pro", - inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`, - wantBudget: true, - }, - } - - for _, cs := range cases { - t.Run(cs.name, func(t *testing.T) { - result := util.NormalizeGeminiThinkingBudget(cs.model, []byte(cs.inputBody)) - - if cs.wantBudget { - budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget") - if !budget.Exists() { - t.Fatalf("thinkingBudget should exist for non-Gemini3 model: %s", string(result)) - } - level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel") - if level.Exists() { - t.Fatalf("thinkingLevel should not exist for non-Gemini3 model: %s", string(result)) - } - } else { - level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel") - if !level.Exists() { - t.Fatalf("thinkingLevel should exist for Gemini3 model: %s", string(result)) - } - if level.String() != cs.wantLevel { - t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel) - } - budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget") - if budget.Exists() { - t.Fatalf("thinkingBudget should be removed for Gemini3 model: %s", string(result)) - } - } - }) - } -} diff --git a/test/model_alias_thinking_suffix_test.go b/test/model_alias_thinking_suffix_test.go deleted file mode 100644 index 270e0cc7..00000000 --- a/test/model_alias_thinking_suffix_test.go +++ /dev/null @@ -1,211 +0,0 @@ -package test - -import ( - "testing" - - "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" - "github.com/tidwall/gjson" -) - -// TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md -// These tests verify the thinking suffix parsing and application logic across different providers. -func TestModelAliasThinkingSuffix(t *testing.T) { - tests := []struct { - id int - name string - provider string - requestModel string - suffixType string - expectedField string // "thinkingBudget", "thinkingLevel", "budget_tokens", "reasoning_effort", "enable_thinking" - expectedValue any - upstreamModel string // The upstream model after alias resolution - isAlias bool - }{ - // === 1. Antigravity Provider === - // 1.1 Budget-only models (Gemini 2.5) - {1, "antigravity_original_numeric", "antigravity", "gemini-2.5-computer-use-preview-10-2025(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", false}, - {2, "antigravity_alias_numeric", "antigravity", "gp(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", true}, - // 1.2 Budget+Levels models (Gemini 3) - {3, "antigravity_original_numeric_to_level", "antigravity", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {4, "antigravity_original_level", "antigravity", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {5, "antigravity_alias_numeric_to_level", "antigravity", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - {6, "antigravity_alias_level", "antigravity", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - - // === 2. Gemini CLI Provider === - // 2.1 Budget-only models - {7, "gemini_cli_original_numeric", "gemini-cli", "gemini-2.5-pro(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", false}, - {8, "gemini_cli_alias_numeric", "gemini-cli", "g25p(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", true}, - // 2.2 Budget+Levels models - {9, "gemini_cli_original_numeric_to_level", "gemini-cli", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {10, "gemini_cli_original_level", "gemini-cli", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {11, "gemini_cli_alias_numeric_to_level", "gemini-cli", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - {12, "gemini_cli_alias_level", "gemini-cli", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - - // === 3. Vertex Provider === - // 3.1 Budget-only models - {13, "vertex_original_numeric", "vertex", "gemini-2.5-pro(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", false}, - {14, "vertex_alias_numeric", "vertex", "vg25p(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", true}, - // 3.2 Budget+Levels models - {15, "vertex_original_numeric_to_level", "vertex", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {16, "vertex_original_level", "vertex", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {17, "vertex_alias_numeric_to_level", "vertex", "vgf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - {18, "vertex_alias_level", "vertex", "vgf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - - // === 4. AI Studio Provider === - // 4.1 Budget-only models - {19, "aistudio_original_numeric", "aistudio", "gemini-2.5-pro(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", false}, - {20, "aistudio_alias_numeric", "aistudio", "ag25p(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", true}, - // 4.2 Budget+Levels models - {21, "aistudio_original_numeric_to_level", "aistudio", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {22, "aistudio_original_level", "aistudio", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, - {23, "aistudio_alias_numeric_to_level", "aistudio", "agf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - {24, "aistudio_alias_level", "aistudio", "agf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, - - // === 5. Claude Provider === - {25, "claude_original_numeric", "claude", "claude-sonnet-4-5-20250929(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", false}, - {26, "claude_alias_numeric", "claude", "cs45(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", true}, - - // === 6. Codex Provider === - {27, "codex_original_level", "codex", "gpt-5(high)", "level", "reasoning_effort", "high", "gpt-5", false}, - {28, "codex_alias_level", "codex", "g5(high)", "level", "reasoning_effort", "high", "gpt-5", true}, - - // === 7. Qwen Provider === - {29, "qwen_original_level", "qwen", "qwen3-coder-plus(high)", "level", "enable_thinking", true, "qwen3-coder-plus", false}, - {30, "qwen_alias_level", "qwen", "qcp(high)", "level", "enable_thinking", true, "qwen3-coder-plus", true}, - - // === 8. iFlow Provider === - {31, "iflow_original_level", "iflow", "glm-4.7(high)", "level", "reasoning_effort", "high", "glm-4.7", false}, - {32, "iflow_alias_level", "iflow", "glm(high)", "level", "reasoning_effort", "high", "glm-4.7", true}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Step 1: Parse model suffix (simulates SDK layer normalization) - // For "gp(1000)" -> requestedModel="gp", metadata={thinking_budget: 1000} - requestedModel, metadata := util.NormalizeThinkingModel(tt.requestModel) - - // Verify suffix was parsed - if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") { - t.Errorf("Case #%d: NormalizeThinkingModel(%q) metadata is nil", tt.id, tt.requestModel) - return - } - - // Step 2: Simulate OAuth model mapping - // Real flow: applyOAuthModelMapping stores requestedModel (the alias) in metadata - if tt.isAlias { - if metadata == nil { - metadata = make(map[string]any) - } - metadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel - } - - // Step 3: Verify metadata extraction - switch tt.suffixType { - case "numeric": - budget, _, _, matched := util.ThinkingFromMetadata(metadata) - if !matched { - t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id) - return - } - if budget == nil { - t.Errorf("Case #%d: expected budget in metadata", tt.id) - return - } - // For thinkingBudget/budget_tokens, verify the parsed budget value - if tt.expectedField == "thinkingBudget" || tt.expectedField == "budget_tokens" { - expectedBudget := tt.expectedValue.(int) - if *budget != expectedBudget { - t.Errorf("Case #%d: budget = %d, want %d", tt.id, *budget, expectedBudget) - } - } - // For thinkingLevel (Gemini 3), verify conversion from budget to level - if tt.expectedField == "thinkingLevel" { - level, ok := util.ThinkingBudgetToGemini3Level(tt.upstreamModel, *budget) - if !ok { - t.Errorf("Case #%d: ThinkingBudgetToGemini3Level failed", tt.id) - return - } - expectedLevel := tt.expectedValue.(string) - if level != expectedLevel { - t.Errorf("Case #%d: converted level = %q, want %q", tt.id, level, expectedLevel) - } - } - - case "level": - _, _, effort, matched := util.ThinkingFromMetadata(metadata) - if !matched { - t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id) - return - } - if effort == nil { - t.Errorf("Case #%d: expected effort in metadata", tt.id) - return - } - if tt.expectedField == "thinkingLevel" || tt.expectedField == "reasoning_effort" { - expectedEffort := tt.expectedValue.(string) - if *effort != expectedEffort { - t.Errorf("Case #%d: effort = %q, want %q", tt.id, *effort, expectedEffort) - } - } - } - - // Step 4: Test Gemini-specific thinkingLevel conversion for Gemini 3 models - if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) { - body := []byte(`{"request":{"contents":[]}}`) - - // Build metadata simulating real OAuth flow: - // - requestedModel (alias like "gf") is stored in model_mapping_original_model - // - upstreamModel is passed as the model parameter - testMetadata := make(map[string]any) - if tt.isAlias { - // Real flow: applyOAuthModelMapping stores requestedModel (the alias) - testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel - } - // Copy parsed metadata (thinking_budget, reasoning_effort, etc.) - for k, v := range metadata { - testMetadata[k] = v - } - - result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(tt.upstreamModel, testMetadata, body) - levelVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel") - - expectedLevel := tt.expectedValue.(string) - if !levelVal.Exists() { - t.Errorf("Case #%d: expected thinkingLevel in result", tt.id) - } else if levelVal.String() != expectedLevel { - t.Errorf("Case #%d: thinkingLevel = %q, want %q", tt.id, levelVal.String(), expectedLevel) - } - } - - // Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow - if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) { - body := []byte(`{"request":{"contents":[]}}`) - - // Build metadata simulating real OAuth flow: - // - requestedModel (alias like "gp") is stored in model_mapping_original_model - // - upstreamModel is passed as the model parameter - testMetadata := make(map[string]any) - if tt.isAlias { - // Real flow: applyOAuthModelMapping stores requestedModel (the alias) - testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel - } - // Copy parsed metadata (thinking_budget, reasoning_effort, etc.) - for k, v := range metadata { - testMetadata[k] = v - } - - // Use the exported ApplyThinkingMetadataCLI which includes the fallback logic - result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel) - budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget") - - expectedBudget := tt.expectedValue.(int) - if !budgetVal.Exists() { - t.Errorf("Case #%d: expected thinkingBudget in result", tt.id) - } else if int(budgetVal.Int()) != expectedBudget { - t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget) - } - } - }) - } -} diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index 74a1bd8a..4a7df29a 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -2,797 +2,2790 @@ package test import ( "fmt" - "strings" "testing" "time" _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator" + // Import provider packages to trigger init() registration of ProviderAppliers + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" - "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) -// isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible -// model that should have reasoning effort passed through even if not in registry. -// This simulates the allowCompat behavior from OpenAICompatExecutor. -func isOpenAICompatModel(model string) bool { - return model == "openai-compat" +// thinkingTestCase represents a common test case structure for both suffix and body tests. +type thinkingTestCase struct { + name string + from string + to string + model string + inputJSON string + expectField string + expectValue string + includeThoughts string + expectErr bool } -// registerCoreModels loads representative models across providers into the registry -// so NormalizeThinkingBudget and level validation use real ranges. -func registerCoreModels(t *testing.T) func() { - t.Helper() +// TestThinkingE2EMatrix_Suffix tests the thinking configuration transformation using model name suffix. +// Data flow: Input JSON → TranslateRequest → ApplyThinking → Validate Output +// No helper functions are used; all test data is inline. +func TestThinkingE2EMatrix_Suffix(t *testing.T) { reg := registry.GetGlobalRegistry() - uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano()) - reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels()) - reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels()) - reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels()) - reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels()) - // Custom openai-compatible model with forced thinking suffix passthrough. - // No Thinking field - simulates an external model added via openai-compat - // where the registry has no knowledge of its thinking capabilities. - // The allowCompat flag should preserve reasoning effort for such models. - customOpenAIModels := []*registry.ModelInfo{ + uid := fmt.Sprintf("thinking-e2e-suffix-%d", time.Now().UnixNano()) + + reg.RegisterClient(uid, "test", getTestModels()) + defer reg.UnregisterClient(uid) + + cases := []thinkingTestCase{ + // level-model (Levels=minimal/low/medium/high, ZeroAllowed=false, DynamicAllowed=false) + + // Case 1: No suffix → injected default → medium { - ID: "openai-compat", - Object: "model", - Created: 1700000000, - OwnedBy: "custom-provider", - Type: "openai", - DisplayName: "OpenAI Compatible Model", - Description: "OpenAI-compatible model with forced thinking suffix support", + name: "1", + from: "openai", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 2: Specified medium → medium + { + name: "2", + from: "openai", + to: "codex", + model: "level-model(medium)", + inputJSON: `{"model":"level-model(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 3: Specified xhigh → out of range error + { + name: "3", + from: "openai", + to: "codex", + model: "level-model(xhigh)", + inputJSON: `{"model":"level-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: true, + }, + // Case 4: Level none → clamped to minimal (ZeroAllowed=false) + { + name: "4", + from: "openai", + to: "codex", + model: "level-model(none)", + inputJSON: `{"model":"level-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 5: Level auto → DynamicAllowed=false → medium (mid-range) + { + name: "5", + from: "openai", + to: "codex", + model: "level-model(auto)", + inputJSON: `{"model":"level-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 6: No suffix from gemini → injected default → medium + { + name: "6", + from: "gemini", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 7: Budget 8192 → medium + { + name: "7", + from: "gemini", + to: "codex", + model: "level-model(8192)", + inputJSON: `{"model":"level-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 8: Budget 64000 → clamped to high + { + name: "8", + from: "gemini", + to: "codex", + model: "level-model(64000)", + inputJSON: `{"model":"level-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "high", + expectErr: false, + }, + // Case 9: Budget 0 → clamped to minimal (ZeroAllowed=false) + { + name: "9", + from: "gemini", + to: "codex", + model: "level-model(0)", + inputJSON: `{"model":"level-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 10: Budget -1 → auto → DynamicAllowed=false → medium (mid-range) + { + name: "10", + from: "gemini", + to: "codex", + model: "level-model(-1)", + inputJSON: `{"model":"level-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 11: Claude source no suffix → passthrough (no thinking) + { + name: "11", + from: "claude", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 12: Budget 8192 → medium + { + name: "12", + from: "claude", + to: "openai", + model: "level-model(8192)", + inputJSON: `{"model":"level-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + // Case 13: Budget 64000 → clamped to high + { + name: "13", + from: "claude", + to: "openai", + model: "level-model(64000)", + inputJSON: `{"model":"level-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "high", + expectErr: false, + }, + // Case 14: Budget 0 → clamped to minimal (ZeroAllowed=false) + { + name: "14", + from: "claude", + to: "openai", + model: "level-model(0)", + inputJSON: `{"model":"level-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 15: Budget -1 → auto → DynamicAllowed=false → medium (mid-range) + { + name: "15", + from: "claude", + to: "openai", + model: "level-model(-1)", + inputJSON: `{"model":"level-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + + // level-subset-model (Levels=low/high, ZeroAllowed=false, DynamicAllowed=false) + + // Case 16: Budget 8192 → medium → rounded down to low + { + name: "16", + from: "gemini", + to: "openai", + model: "level-subset-model(8192)", + inputJSON: `{"model":"level-subset-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "low", + expectErr: false, + }, + // Case 17: Budget 1 → minimal → clamped to low (min supported) + { + name: "17", + from: "claude", + to: "gemini", + model: "level-subset-model(1)", + inputJSON: `{"model":"level-subset-model(1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "true", + expectErr: false, + }, + + // gemini-budget-model (Min=128, Max=20000, ZeroAllowed=false, DynamicAllowed=true) + + // Case 18: No suffix → passthrough + { + name: "18", + from: "openai", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 19: Effort medium → 8192 + { + name: "19", + from: "openai", + to: "gemini", + model: "gemini-budget-model(medium)", + inputJSON: `{"model":"gemini-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 20: Effort xhigh → clamped to 20000 (max) + { + name: "20", + from: "openai", + to: "gemini", + model: "gemini-budget-model(xhigh)", + inputJSON: `{"model":"gemini-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 21: Effort none → clamped to 128 (min) → includeThoughts=false + { + name: "21", + from: "openai", + to: "gemini", + model: "gemini-budget-model(none)", + inputJSON: `{"model":"gemini-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "128", + includeThoughts: "false", + expectErr: false, + }, + // Case 22: Effort auto → DynamicAllowed=true → -1 + { + name: "22", + from: "openai", + to: "gemini", + model: "gemini-budget-model(auto)", + inputJSON: `{"model":"gemini-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + // Case 23: Claude source no suffix → passthrough + { + name: "23", + from: "claude", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 24: Budget 8192 → 8192 + { + name: "24", + from: "claude", + to: "gemini", + model: "gemini-budget-model(8192)", + inputJSON: `{"model":"gemini-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 25: Budget 64000 → clamped to 20000 (max) + { + name: "25", + from: "claude", + to: "gemini", + model: "gemini-budget-model(64000)", + inputJSON: `{"model":"gemini-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 26: Budget 0 → clamped to 128 (min) → includeThoughts=false + { + name: "26", + from: "claude", + to: "gemini", + model: "gemini-budget-model(0)", + inputJSON: `{"model":"gemini-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "128", + includeThoughts: "false", + expectErr: false, + }, + // Case 27: Budget -1 → DynamicAllowed=true → -1 + { + name: "27", + from: "claude", + to: "gemini", + model: "gemini-budget-model(-1)", + inputJSON: `{"model":"gemini-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // gemini-mixed-model (Min=128, Max=32768, Levels=low/high, ZeroAllowed=false, DynamicAllowed=true) + + // Case 28: OpenAI source no suffix → passthrough + { + name: "28", + from: "openai", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 29: Effort high → low/high supported → high + { + name: "29", + from: "openai", + to: "gemini", + model: "gemini-mixed-model(high)", + inputJSON: `{"model":"gemini-mixed-model(high)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "high", + includeThoughts: "true", + expectErr: false, + }, + // Case 30: Effort xhigh → not in low/high → error + { + name: "30", + from: "openai", + to: "gemini", + model: "gemini-mixed-model(xhigh)", + inputJSON: `{"model":"gemini-mixed-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: true, + }, + // Case 31: Effort none → clamped to low (min supported) → includeThoughts=false + { + name: "31", + from: "openai", + to: "gemini", + model: "gemini-mixed-model(none)", + inputJSON: `{"model":"gemini-mixed-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "false", + expectErr: false, + }, + // Case 32: Effort auto → DynamicAllowed=true → -1 (budget) + { + name: "32", + from: "openai", + to: "gemini", + model: "gemini-mixed-model(auto)", + inputJSON: `{"model":"gemini-mixed-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + // Case 33: Claude source no suffix → passthrough + { + name: "33", + from: "claude", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 34: Budget 8192 → 8192 (keep budget) + { + name: "34", + from: "claude", + to: "gemini", + model: "gemini-mixed-model(8192)", + inputJSON: `{"model":"gemini-mixed-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 35: Budget 64000 → clamped to 32768 (max) + { + name: "35", + from: "claude", + to: "gemini", + model: "gemini-mixed-model(64000)", + inputJSON: `{"model":"gemini-mixed-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "32768", + includeThoughts: "true", + expectErr: false, + }, + // Case 36: Budget 0 → minimal → clamped to low (min level) → includeThoughts=false + { + name: "36", + from: "claude", + to: "gemini", + model: "gemini-mixed-model(0)", + inputJSON: `{"model":"gemini-mixed-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "false", + expectErr: false, + }, + // Case 37: Budget -1 → DynamicAllowed=true → -1 (budget) + { + name: "37", + from: "claude", + to: "gemini", + model: "gemini-mixed-model(-1)", + inputJSON: `{"model":"gemini-mixed-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // claude-budget-model (Min=1024, Max=128000, ZeroAllowed=true, DynamicAllowed=false) + + // Case 38: OpenAI source no suffix → passthrough + { + name: "38", + from: "openai", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 39: Effort medium → 8192 + { + name: "39", + from: "openai", + to: "claude", + model: "claude-budget-model(medium)", + inputJSON: `{"model":"claude-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 40: Effort xhigh → clamped to 32768 (matrix value) + { + name: "40", + from: "openai", + to: "claude", + model: "claude-budget-model(xhigh)", + inputJSON: `{"model":"claude-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "32768", + expectErr: false, + }, + // Case 41: Effort none → ZeroAllowed=true → disabled + { + name: "41", + from: "openai", + to: "claude", + model: "claude-budget-model(none)", + inputJSON: `{"model":"claude-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.type", + expectValue: "disabled", + expectErr: false, + }, + // Case 42: Effort auto → DynamicAllowed=false → 64512 (mid-range) + { + name: "42", + from: "openai", + to: "claude", + model: "claude-budget-model(auto)", + inputJSON: `{"model":"claude-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "64512", + expectErr: false, + }, + // Case 43: Gemini source no suffix → passthrough + { + name: "43", + from: "gemini", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 44: Budget 8192 → 8192 + { + name: "44", + from: "gemini", + to: "claude", + model: "claude-budget-model(8192)", + inputJSON: `{"model":"claude-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 45: Budget 200000 → clamped to 128000 (max) + { + name: "45", + from: "gemini", + to: "claude", + model: "claude-budget-model(200000)", + inputJSON: `{"model":"claude-budget-model(200000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.budget_tokens", + expectValue: "128000", + expectErr: false, + }, + // Case 46: Budget 0 → ZeroAllowed=true → disabled + { + name: "46", + from: "gemini", + to: "claude", + model: "claude-budget-model(0)", + inputJSON: `{"model":"claude-budget-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.type", + expectValue: "disabled", + expectErr: false, + }, + // Case 47: Budget -1 → auto → DynamicAllowed=false → 64512 (mid-range) + { + name: "47", + from: "gemini", + to: "claude", + model: "claude-budget-model(-1)", + inputJSON: `{"model":"claude-budget-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.budget_tokens", + expectValue: "64512", + expectErr: false, + }, + + // antigravity-budget-model (Min=128, Max=20000, ZeroAllowed=true, DynamicAllowed=true) + + // Case 48: Gemini to Antigravity no suffix → passthrough + { + name: "48", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 49: Effort medium → 8192 + { + name: "49", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model(medium)", + inputJSON: `{"model":"antigravity-budget-model(medium)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 50: Effort xhigh → clamped to 20000 (max) + { + name: "50", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model(xhigh)", + inputJSON: `{"model":"antigravity-budget-model(xhigh)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 51: Effort none → ZeroAllowed=true → 0 → includeThoughts=false + { + name: "51", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model(none)", + inputJSON: `{"model":"antigravity-budget-model(none)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "0", + includeThoughts: "false", + expectErr: false, + }, + // Case 52: Effort auto → DynamicAllowed=true → -1 + { + name: "52", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model(auto)", + inputJSON: `{"model":"antigravity-budget-model(auto)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + // Case 53: Claude to Antigravity no suffix → passthrough + { + name: "53", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 54: Budget 8192 → 8192 + { + name: "54", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model(8192)", + inputJSON: `{"model":"antigravity-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 55: Budget 64000 → clamped to 20000 (max) + { + name: "55", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model(64000)", + inputJSON: `{"model":"antigravity-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 56: Budget 0 → ZeroAllowed=true → 0 → includeThoughts=false + { + name: "56", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model(0)", + inputJSON: `{"model":"antigravity-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "0", + includeThoughts: "false", + expectErr: false, + }, + // Case 57: Budget -1 → DynamicAllowed=true → -1 + { + name: "57", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model(-1)", + inputJSON: `{"model":"antigravity-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // no-thinking-model (Thinking=nil) + + // Case 58: No thinking support → no configuration + { + name: "58", + from: "gemini", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 59: Budget 8192 → no thinking support → suffix stripped → no configuration + { + name: "59", + from: "gemini", + to: "openai", + model: "no-thinking-model(8192)", + inputJSON: `{"model":"no-thinking-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 60: Budget 0 → suffix stripped → no configuration + { + name: "60", + from: "gemini", + to: "openai", + model: "no-thinking-model(0)", + inputJSON: `{"model":"no-thinking-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 61: Budget -1 → suffix stripped → no configuration + { + name: "61", + from: "gemini", + to: "openai", + model: "no-thinking-model(-1)", + inputJSON: `{"model":"no-thinking-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 62: Claude source no suffix → no configuration + { + name: "62", + from: "claude", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 63: Budget 8192 → suffix stripped → no configuration + { + name: "63", + from: "claude", + to: "openai", + model: "no-thinking-model(8192)", + inputJSON: `{"model":"no-thinking-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 64: Budget 0 → suffix stripped → no configuration + { + name: "64", + from: "claude", + to: "openai", + model: "no-thinking-model(0)", + inputJSON: `{"model":"no-thinking-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 65: Budget -1 → suffix stripped → no configuration + { + name: "65", + from: "claude", + to: "openai", + model: "no-thinking-model(-1)", + inputJSON: `{"model":"no-thinking-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + + // user-defined-model (UserDefined=true, Thinking=nil) + + // Case 66: User defined model no suffix → passthrough + { + name: "66", + from: "gemini", + to: "openai", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 67: Budget 8192 → passthrough logic → medium + { + name: "67", + from: "gemini", + to: "openai", + model: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + // Case 68: Budget 64000 → passthrough logic → xhigh + { + name: "68", + from: "gemini", + to: "openai", + model: "user-defined-model(64000)", + inputJSON: `{"model":"user-defined-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "xhigh", + expectErr: false, + }, + // Case 69: Budget 0 → passthrough logic → none + { + name: "69", + from: "gemini", + to: "openai", + model: "user-defined-model(0)", + inputJSON: `{"model":"user-defined-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "none", + expectErr: false, + }, + // Case 70: Budget -1 → passthrough logic → auto + { + name: "70", + from: "gemini", + to: "openai", + model: "user-defined-model(-1)", + inputJSON: `{"model":"user-defined-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "auto", + expectErr: false, + }, + // Case 71: Claude to Codex no suffix → injected default → medium + { + name: "71", + from: "claude", + to: "codex", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 72: Budget 8192 → passthrough logic → medium + { + name: "72", + from: "claude", + to: "codex", + model: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 73: Budget 64000 → passthrough logic → xhigh + { + name: "73", + from: "claude", + to: "codex", + model: "user-defined-model(64000)", + inputJSON: `{"model":"user-defined-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "xhigh", + expectErr: false, + }, + // Case 74: Budget 0 → passthrough logic → none + { + name: "74", + from: "claude", + to: "codex", + model: "user-defined-model(0)", + inputJSON: `{"model":"user-defined-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "none", + expectErr: false, + }, + // Case 75: Budget -1 → passthrough logic → auto + { + name: "75", + from: "claude", + to: "codex", + model: "user-defined-model(-1)", + inputJSON: `{"model":"user-defined-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "auto", + expectErr: false, + }, + // Case 76: OpenAI to Gemini budget 8192 → passthrough → 8192 + { + name: "76", + from: "openai", + to: "gemini", + model: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 77: OpenAI to Claude budget 8192 → passthrough → 8192 + { + name: "77", + from: "openai", + to: "claude", + model: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 78: OpenAI-Response to Gemini budget 8192 → passthrough → 8192 + { + name: "78", + from: "openai-response", + to: "gemini", + model: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 79: OpenAI-Response to Claude budget 8192 → passthrough → 8192 + { + name: "79", + from: "openai-response", + to: "claude", + model: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + + // Same-protocol passthrough tests (80-89) + + // Case 80: OpenAI to OpenAI, level high → passthrough reasoning_effort + { + name: "80", + from: "openai", + to: "openai", + model: "level-model(high)", + inputJSON: `{"model":"level-model(high)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "high", + expectErr: false, + }, + // Case 81: OpenAI to OpenAI, level xhigh → out of range error + { + name: "81", + from: "openai", + to: "openai", + model: "level-model(xhigh)", + inputJSON: `{"model":"level-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: true, + }, + // Case 82: OpenAI-Response to Codex, level high → passthrough reasoning.effort + { + name: "82", + from: "openai-response", + to: "codex", + model: "level-model(high)", + inputJSON: `{"model":"level-model(high)","input":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "high", + expectErr: false, + }, + // Case 83: OpenAI-Response to Codex, level xhigh → out of range error + { + name: "83", + from: "openai-response", + to: "codex", + model: "level-model(xhigh)", + inputJSON: `{"model":"level-model(xhigh)","input":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: true, + }, + // Case 84: Gemini to Gemini, budget 8192 → passthrough thinkingBudget + { + name: "84", + from: "gemini", + to: "gemini", + model: "gemini-budget-model(8192)", + inputJSON: `{"model":"gemini-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 85: Gemini to Gemini, budget 64000 → clamped to Max + { + name: "85", + from: "gemini", + to: "gemini", + model: "gemini-budget-model(64000)", + inputJSON: `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 86: Claude to Claude, budget 8192 → passthrough thinking.budget_tokens + { + name: "86", + from: "claude", + to: "claude", + model: "claude-budget-model(8192)", + inputJSON: `{"model":"claude-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 87: Claude to Claude, budget 200000 → clamped to Max + { + name: "87", + from: "claude", + to: "claude", + model: "claude-budget-model(200000)", + inputJSON: `{"model":"claude-budget-model(200000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "128000", + expectErr: false, + }, + // Case 88: Gemini-CLI to Antigravity, budget 8192 → passthrough thinkingBudget + { + name: "88", + from: "gemini-cli", + to: "antigravity", + model: "antigravity-budget-model(8192)", + inputJSON: `{"model":"antigravity-budget-model(8192)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 89: Gemini-CLI to Antigravity, budget 64000 → clamped to Max + { + name: "89", + from: "gemini-cli", + to: "antigravity", + model: "antigravity-budget-model(64000)", + inputJSON: `{"model":"antigravity-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + + // iflow tests: glm-test and minimax-test (Cases 90-105) + + // glm-test (from: openai, claude) + // Case 90: OpenAI to iflow, no suffix → passthrough + { + name: "90", + from: "openai", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 91: OpenAI to iflow, (medium) → enable_thinking=true + { + name: "91", + from: "openai", + to: "iflow", + model: "glm-test(medium)", + inputJSON: `{"model":"glm-test(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 92: OpenAI to iflow, (auto) → enable_thinking=true + { + name: "92", + from: "openai", + to: "iflow", + model: "glm-test(auto)", + inputJSON: `{"model":"glm-test(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 93: OpenAI to iflow, (none) → enable_thinking=false + { + name: "93", + from: "openai", + to: "iflow", + model: "glm-test(none)", + inputJSON: `{"model":"glm-test(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "false", + expectErr: false, + }, + // Case 94: Claude to iflow, no suffix → passthrough + { + name: "94", + from: "claude", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 95: Claude to iflow, (8192) → enable_thinking=true + { + name: "95", + from: "claude", + to: "iflow", + model: "glm-test(8192)", + inputJSON: `{"model":"glm-test(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 96: Claude to iflow, (-1) → enable_thinking=true + { + name: "96", + from: "claude", + to: "iflow", + model: "glm-test(-1)", + inputJSON: `{"model":"glm-test(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 97: Claude to iflow, (0) → enable_thinking=false + { + name: "97", + from: "claude", + to: "iflow", + model: "glm-test(0)", + inputJSON: `{"model":"glm-test(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "false", + expectErr: false, + }, + + // minimax-test (from: openai, gemini) + // Case 98: OpenAI to iflow, no suffix → passthrough + { + name: "98", + from: "openai", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 99: OpenAI to iflow, (medium) → reasoning_split=true + { + name: "99", + from: "openai", + to: "iflow", + model: "minimax-test(medium)", + inputJSON: `{"model":"minimax-test(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 100: OpenAI to iflow, (auto) → reasoning_split=true + { + name: "100", + from: "openai", + to: "iflow", + model: "minimax-test(auto)", + inputJSON: `{"model":"minimax-test(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 101: OpenAI to iflow, (none) → reasoning_split=false + { + name: "101", + from: "openai", + to: "iflow", + model: "minimax-test(none)", + inputJSON: `{"model":"minimax-test(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_split", + expectValue: "false", + expectErr: false, + }, + // Case 102: Gemini to iflow, no suffix → passthrough + { + name: "102", + from: "gemini", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 103: Gemini to iflow, (8192) → reasoning_split=true + { + name: "103", + from: "gemini", + to: "iflow", + model: "minimax-test(8192)", + inputJSON: `{"model":"minimax-test(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 104: Gemini to iflow, (-1) → reasoning_split=true + { + name: "104", + from: "gemini", + to: "iflow", + model: "minimax-test(-1)", + inputJSON: `{"model":"minimax-test(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 105: Gemini to iflow, (0) → reasoning_split=false + { + name: "105", + from: "gemini", + to: "iflow", + model: "minimax-test(0)", + inputJSON: `{"model":"minimax-test(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_split", + expectValue: "false", + expectErr: false, + }, + + // Gemini Family Cross-Channel Consistency (Cases 106-114) + // Tests that gemini/gemini-cli/antigravity as same API family should have consistent validation behavior + + // Case 106: Gemini to Antigravity, budget 64000 (suffix) → clamped to Max + { + name: "106", + from: "gemini", + to: "antigravity", + model: "gemini-budget-model(64000)", + inputJSON: `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 107: Gemini to Gemini-CLI, budget 64000 (suffix) → clamped to Max + { + name: "107", + from: "gemini", + to: "gemini-cli", + model: "gemini-budget-model(64000)", + inputJSON: `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 108: Gemini-CLI to Antigravity, budget 64000 (suffix) → clamped to Max + { + name: "108", + from: "gemini-cli", + to: "antigravity", + model: "gemini-budget-model(64000)", + inputJSON: `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 109: Gemini-CLI to Gemini, budget 64000 (suffix) → clamped to Max + { + name: "109", + from: "gemini-cli", + to: "gemini", + model: "gemini-budget-model(64000)", + inputJSON: `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 110: Gemini to Antigravity, budget 8192 → passthrough (normal value) + { + name: "110", + from: "gemini", + to: "antigravity", + model: "gemini-budget-model(8192)", + inputJSON: `{"model":"gemini-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 111: Gemini-CLI to Antigravity, budget 8192 → passthrough (normal value) + { + name: "111", + from: "gemini-cli", + to: "antigravity", + model: "gemini-budget-model(8192)", + inputJSON: `{"model":"gemini-budget-model(8192)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, }, } - reg.RegisterClient(uid+"-custom-openai", "codex", customOpenAIModels) - return func() { - reg.UnregisterClient(uid + "-gemini") - reg.UnregisterClient(uid + "-claude") - reg.UnregisterClient(uid + "-openai") - reg.UnregisterClient(uid + "-qwen") - reg.UnregisterClient(uid + "-custom-openai") + + runThinkingTests(t, cases) +} + +// TestThinkingE2EMatrix_Body tests the thinking configuration transformation using request body parameters. +// Data flow: Input JSON with thinking params → TranslateRequest → ApplyThinking → Validate Output +func TestThinkingE2EMatrix_Body(t *testing.T) { + reg := registry.GetGlobalRegistry() + uid := fmt.Sprintf("thinking-e2e-body-%d", time.Now().UnixNano()) + + reg.RegisterClient(uid, "test", getTestModels()) + defer reg.UnregisterClient(uid) + + cases := []thinkingTestCase{ + // level-model (Levels=minimal/low/medium/high, ZeroAllowed=false, DynamicAllowed=false) + + // Case 1: No param → injected default → medium + { + name: "1", + from: "openai", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 2: reasoning_effort=medium → medium + { + name: "2", + from: "openai", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 3: reasoning_effort=xhigh → out of range error + { + name: "3", + from: "openai", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`, + expectField: "", + expectErr: true, + }, + // Case 4: reasoning_effort=none → clamped to minimal + { + name: "4", + from: "openai", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`, + expectField: "reasoning.effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 5: reasoning_effort=auto → medium (DynamicAllowed=false) + { + name: "5", + from: "openai", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 6: No param from gemini → injected default → medium + { + name: "6", + from: "gemini", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 7: thinkingBudget=8192 → medium + { + name: "7", + from: "gemini", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 8: thinkingBudget=64000 → clamped to high + { + name: "8", + from: "gemini", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`, + expectField: "reasoning.effort", + expectValue: "high", + expectErr: false, + }, + // Case 9: thinkingBudget=0 → clamped to minimal + { + name: "9", + from: "gemini", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`, + expectField: "reasoning.effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 10: thinkingBudget=-1 → medium (DynamicAllowed=false) + { + name: "10", + from: "gemini", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 11: Claude no param → passthrough (no thinking) + { + name: "11", + from: "claude", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 12: thinking.budget_tokens=8192 → medium + { + name: "12", + from: "claude", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + // Case 13: thinking.budget_tokens=64000 → clamped to high + { + name: "13", + from: "claude", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`, + expectField: "reasoning_effort", + expectValue: "high", + expectErr: false, + }, + // Case 14: thinking.budget_tokens=0 → clamped to minimal + { + name: "14", + from: "claude", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`, + expectField: "reasoning_effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 15: thinking.budget_tokens=-1 → medium (DynamicAllowed=false) + { + name: "15", + from: "claude", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + + // level-subset-model (Levels=low/high, ZeroAllowed=false, DynamicAllowed=false) + + // Case 16: thinkingBudget=8192 → medium → rounded down to low + { + name: "16", + from: "gemini", + to: "openai", + model: "level-subset-model", + inputJSON: `{"model":"level-subset-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "reasoning_effort", + expectValue: "low", + expectErr: false, + }, + // Case 17: thinking.budget_tokens=1 → minimal → clamped to low + { + name: "17", + from: "claude", + to: "gemini", + model: "level-subset-model", + inputJSON: `{"model":"level-subset-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":1}}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "true", + expectErr: false, + }, + + // gemini-budget-model (Min=128, Max=20000, ZeroAllowed=false, DynamicAllowed=true) + + // Case 18: No param → passthrough + { + name: "18", + from: "openai", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 19: reasoning_effort=medium → 8192 + { + name: "19", + from: "openai", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 20: reasoning_effort=xhigh → clamped to 20000 + { + name: "20", + from: "openai", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 21: reasoning_effort=none → clamped to 128 → includeThoughts=false + { + name: "21", + from: "openai", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "128", + includeThoughts: "false", + expectErr: false, + }, + // Case 22: reasoning_effort=auto → -1 (DynamicAllowed=true) + { + name: "22", + from: "openai", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + // Case 23: Claude no param → passthrough + { + name: "23", + from: "claude", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 24: thinking.budget_tokens=8192 → 8192 + { + name: "24", + from: "claude", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 25: thinking.budget_tokens=64000 → clamped to 20000 + { + name: "25", + from: "claude", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 26: thinking.budget_tokens=0 → clamped to 128 → includeThoughts=false + { + name: "26", + from: "claude", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "128", + includeThoughts: "false", + expectErr: false, + }, + // Case 27: thinking.budget_tokens=-1 → -1 (DynamicAllowed=true) + { + name: "27", + from: "claude", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // gemini-mixed-model (Min=128, Max=32768, Levels=low/high, ZeroAllowed=false, DynamicAllowed=true) + + // Case 28: No param → passthrough + { + name: "28", + from: "openai", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 29: reasoning_effort=high → high + { + name: "29", + from: "openai", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"high"}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "high", + includeThoughts: "true", + expectErr: false, + }, + // Case 30: reasoning_effort=xhigh → error (not in low/high) + { + name: "30", + from: "openai", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`, + expectField: "", + expectErr: true, + }, + // Case 31: reasoning_effort=none → clamped to low → includeThoughts=false + { + name: "31", + from: "openai", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "false", + expectErr: false, + }, + // Case 32: reasoning_effort=auto → -1 (DynamicAllowed=true) + { + name: "32", + from: "openai", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + // Case 33: Claude no param → passthrough + { + name: "33", + from: "claude", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 34: thinking.budget_tokens=8192 → 8192 (keeps budget) + { + name: "34", + from: "claude", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 35: thinking.budget_tokens=64000 → clamped to 32768 (keeps budget) + { + name: "35", + from: "claude", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "32768", + includeThoughts: "true", + expectErr: false, + }, + // Case 36: thinking.budget_tokens=0 → clamped to low → includeThoughts=false + { + name: "36", + from: "claude", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "false", + expectErr: false, + }, + // Case 37: thinking.budget_tokens=-1 → -1 (DynamicAllowed=true) + { + name: "37", + from: "claude", + to: "gemini", + model: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // claude-budget-model (Min=1024, Max=128000, ZeroAllowed=true, DynamicAllowed=false) + + // Case 38: No param → passthrough + { + name: "38", + from: "openai", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 39: reasoning_effort=medium → 8192 + { + name: "39", + from: "openai", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 40: reasoning_effort=xhigh → clamped to 32768 + { + name: "40", + from: "openai", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`, + expectField: "thinking.budget_tokens", + expectValue: "32768", + expectErr: false, + }, + // Case 41: reasoning_effort=none → disabled + { + name: "41", + from: "openai", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`, + expectField: "thinking.type", + expectValue: "disabled", + expectErr: false, + }, + // Case 42: reasoning_effort=auto → 64512 (mid-range) + { + name: "42", + from: "openai", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`, + expectField: "thinking.budget_tokens", + expectValue: "64512", + expectErr: false, + }, + // Case 43: Gemini no param → passthrough + { + name: "43", + from: "gemini", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 44: thinkingBudget=8192 → 8192 + { + name: "44", + from: "gemini", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 45: thinkingBudget=200000 → clamped to 128000 + { + name: "45", + from: "gemini", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":200000}}}`, + expectField: "thinking.budget_tokens", + expectValue: "128000", + expectErr: false, + }, + // Case 46: thinkingBudget=0 → disabled + { + name: "46", + from: "gemini", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`, + expectField: "thinking.type", + expectValue: "disabled", + expectErr: false, + }, + // Case 47: thinkingBudget=-1 → 64512 (mid-range) + { + name: "47", + from: "gemini", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, + expectField: "thinking.budget_tokens", + expectValue: "64512", + expectErr: false, + }, + + // antigravity-budget-model (Min=128, Max=20000, ZeroAllowed=true, DynamicAllowed=true) + + // Case 48: Gemini no param → passthrough + { + name: "48", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 49: thinkingLevel=medium → 8192 + { + name: "49", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingLevel":"medium"}}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 50: thinkingLevel=xhigh → clamped to 20000 + { + name: "50", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingLevel":"xhigh"}}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 51: thinkingLevel=none → 0 (ZeroAllowed=true) + { + name: "51", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingLevel":"none"}}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "0", + includeThoughts: "false", + expectErr: false, + }, + // Case 52: thinkingBudget=-1 → -1 (DynamicAllowed=true) + { + name: "52", + from: "gemini", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + // Case 53: Claude no param → passthrough + { + name: "53", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 54: thinking.budget_tokens=8192 → 8192 + { + name: "54", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 55: thinking.budget_tokens=64000 → clamped to 20000 + { + name: "55", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // Case 56: thinking.budget_tokens=0 → 0 (ZeroAllowed=true) + { + name: "56", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "0", + includeThoughts: "false", + expectErr: false, + }, + // Case 57: thinking.budget_tokens=-1 → -1 (DynamicAllowed=true) + { + name: "57", + from: "claude", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // no-thinking-model (Thinking=nil) + + // Case 58: Gemini no param → passthrough + { + name: "58", + from: "gemini", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 59: thinkingBudget=8192 → stripped + { + name: "59", + from: "gemini", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "", + expectErr: false, + }, + // Case 60: thinkingBudget=0 → stripped + { + name: "60", + from: "gemini", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`, + expectField: "", + expectErr: false, + }, + // Case 61: thinkingBudget=-1 → stripped + { + name: "61", + from: "gemini", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, + expectField: "", + expectErr: false, + }, + // Case 62: Claude no param → passthrough + { + name: "62", + from: "claude", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 63: thinking.budget_tokens=8192 → stripped + { + name: "63", + from: "claude", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "", + expectErr: false, + }, + // Case 64: thinking.budget_tokens=0 → stripped + { + name: "64", + from: "claude", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`, + expectField: "", + expectErr: false, + }, + // Case 65: thinking.budget_tokens=-1 → stripped + { + name: "65", + from: "claude", + to: "openai", + model: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`, + expectField: "", + expectErr: false, + }, + + // user-defined-model (UserDefined=true, Thinking=nil) + + // Case 66: Gemini no param → passthrough + { + name: "66", + from: "gemini", + to: "openai", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 67: thinkingBudget=8192 → medium + { + name: "67", + from: "gemini", + to: "openai", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + // Case 68: thinkingBudget=64000 → xhigh (passthrough) + { + name: "68", + from: "gemini", + to: "openai", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`, + expectField: "reasoning_effort", + expectValue: "xhigh", + expectErr: false, + }, + // Case 69: thinkingBudget=0 → none + { + name: "69", + from: "gemini", + to: "openai", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`, + expectField: "reasoning_effort", + expectValue: "none", + expectErr: false, + }, + // Case 70: thinkingBudget=-1 → auto + { + name: "70", + from: "gemini", + to: "openai", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, + expectField: "reasoning_effort", + expectValue: "auto", + expectErr: false, + }, + // Case 71: Claude no param → injected default → medium + { + name: "71", + from: "claude", + to: "codex", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 72: thinking.budget_tokens=8192 → medium + { + name: "72", + from: "claude", + to: "codex", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 73: thinking.budget_tokens=64000 → xhigh (passthrough) + { + name: "73", + from: "claude", + to: "codex", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`, + expectField: "reasoning.effort", + expectValue: "xhigh", + expectErr: false, + }, + // Case 74: thinking.budget_tokens=0 → none + { + name: "74", + from: "claude", + to: "codex", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`, + expectField: "reasoning.effort", + expectValue: "none", + expectErr: false, + }, + // Case 75: thinking.budget_tokens=-1 → auto + { + name: "75", + from: "claude", + to: "codex", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`, + expectField: "reasoning.effort", + expectValue: "auto", + expectErr: false, + }, + // Case 76: OpenAI reasoning_effort=medium to Gemini → 8192 + { + name: "76", + from: "openai", + to: "gemini", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 77: OpenAI reasoning_effort=medium to Claude → 8192 + { + name: "77", + from: "openai", + to: "claude", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 78: OpenAI-Response reasoning.effort=medium to Gemini → 8192 + { + name: "78", + from: "openai-response", + to: "gemini", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"medium"}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 79: OpenAI-Response reasoning.effort=medium to Claude → 8192 + { + name: "79", + from: "openai-response", + to: "claude", + model: "user-defined-model", + inputJSON: `{"model":"user-defined-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"medium"}}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + + // Same-protocol passthrough tests (80-89) + + // Case 80: OpenAI to OpenAI, reasoning_effort=high → passthrough + { + name: "80", + from: "openai", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"high"}`, + expectField: "reasoning_effort", + expectValue: "high", + expectErr: false, + }, + // Case 81: OpenAI to OpenAI, reasoning_effort=xhigh → out of range error + { + name: "81", + from: "openai", + to: "openai", + model: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`, + expectField: "", + expectErr: true, + }, + // Case 82: OpenAI-Response to Codex, reasoning.effort=high → passthrough + { + name: "82", + from: "openai-response", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"high"}}`, + expectField: "reasoning.effort", + expectValue: "high", + expectErr: false, + }, + // Case 83: OpenAI-Response to Codex, reasoning.effort=xhigh → out of range error + { + name: "83", + from: "openai-response", + to: "codex", + model: "level-model", + inputJSON: `{"model":"level-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"xhigh"}}`, + expectField: "", + expectErr: true, + }, + // Case 84: Gemini to Gemini, thinkingBudget=8192 → passthrough + { + name: "84", + from: "gemini", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 85: Gemini to Gemini, thinkingBudget=64000 → exceeds Max error + { + name: "85", + from: "gemini", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`, + expectField: "", + expectErr: true, + }, + // Case 86: Claude to Claude, thinking.budget_tokens=8192 → passthrough + { + name: "86", + from: "claude", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // Case 87: Claude to Claude, thinking.budget_tokens=200000 → exceeds Max error + { + name: "87", + from: "claude", + to: "claude", + model: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":200000}}`, + expectField: "", + expectErr: true, + }, + // Case 88: Gemini-CLI to Antigravity, thinkingBudget=8192 → passthrough + { + name: "88", + from: "gemini-cli", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 89: Gemini-CLI to Antigravity, thinkingBudget=64000 → exceeds Max error + { + name: "89", + from: "gemini-cli", + to: "antigravity", + model: "antigravity-budget-model", + inputJSON: `{"model":"antigravity-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}}`, + expectField: "", + expectErr: true, + }, + + // iflow tests: glm-test and minimax-test (Cases 90-105) + + // glm-test (from: openai, claude) + // Case 90: OpenAI to iflow, no param → passthrough + { + name: "90", + from: "openai", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 91: OpenAI to iflow, reasoning_effort=medium → enable_thinking=true + { + name: "91", + from: "openai", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 92: OpenAI to iflow, reasoning_effort=auto → enable_thinking=true + { + name: "92", + from: "openai", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 93: OpenAI to iflow, reasoning_effort=none → enable_thinking=false + { + name: "93", + from: "openai", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "false", + expectErr: false, + }, + // Case 94: Claude to iflow, no param → passthrough + { + name: "94", + from: "claude", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 95: Claude to iflow, thinking.budget_tokens=8192 → enable_thinking=true + { + name: "95", + from: "claude", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 96: Claude to iflow, thinking.budget_tokens=-1 → enable_thinking=true + { + name: "96", + from: "claude", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "true", + expectErr: false, + }, + // Case 97: Claude to iflow, thinking.budget_tokens=0 → enable_thinking=false + { + name: "97", + from: "claude", + to: "iflow", + model: "glm-test", + inputJSON: `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`, + expectField: "chat_template_kwargs.enable_thinking", + expectValue: "false", + expectErr: false, + }, + + // minimax-test (from: openai, gemini) + // Case 98: OpenAI to iflow, no param → passthrough + { + name: "98", + from: "openai", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 99: OpenAI to iflow, reasoning_effort=medium → reasoning_split=true + { + name: "99", + from: "openai", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 100: OpenAI to iflow, reasoning_effort=auto → reasoning_split=true + { + name: "100", + from: "openai", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 101: OpenAI to iflow, reasoning_effort=none → reasoning_split=false + { + name: "101", + from: "openai", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`, + expectField: "reasoning_split", + expectValue: "false", + expectErr: false, + }, + // Case 102: Gemini to iflow, no param → passthrough + { + name: "102", + from: "gemini", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // Case 103: Gemini to iflow, thinkingBudget=8192 → reasoning_split=true + { + name: "103", + from: "gemini", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 104: Gemini to iflow, thinkingBudget=-1 → reasoning_split=true + { + name: "104", + from: "gemini", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, + expectField: "reasoning_split", + expectValue: "true", + expectErr: false, + }, + // Case 105: Gemini to iflow, thinkingBudget=0 → reasoning_split=false + { + name: "105", + from: "gemini", + to: "iflow", + model: "minimax-test", + inputJSON: `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`, + expectField: "reasoning_split", + expectValue: "false", + expectErr: false, + }, + + // Gemini Family Cross-Channel Consistency (Cases 106-114) + // Tests that gemini/gemini-cli/antigravity as same API family should have consistent validation behavior + + // Case 106: Gemini to Antigravity, thinkingBudget=64000 → exceeds Max error (same family strict validation) + { + name: "106", + from: "gemini", + to: "antigravity", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`, + expectField: "", + expectErr: true, + }, + // Case 107: Gemini to Gemini-CLI, thinkingBudget=64000 → exceeds Max error (same family strict validation) + { + name: "107", + from: "gemini", + to: "gemini-cli", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`, + expectField: "", + expectErr: true, + }, + // Case 108: Gemini-CLI to Antigravity, thinkingBudget=64000 → exceeds Max error (same family strict validation) + { + name: "108", + from: "gemini-cli", + to: "antigravity", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}}`, + expectField: "", + expectErr: true, + }, + // Case 109: Gemini-CLI to Gemini, thinkingBudget=64000 → exceeds Max error (same family strict validation) + { + name: "109", + from: "gemini-cli", + to: "gemini", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}}`, + expectField: "", + expectErr: true, + }, + // Case 110: Gemini to Antigravity, thinkingBudget=8192 → passthrough (normal value) + { + name: "110", + from: "gemini", + to: "antigravity", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // Case 111: Gemini-CLI to Antigravity, thinkingBudget=8192 → passthrough (normal value) + { + name: "111", + from: "gemini-cli", + to: "antigravity", + model: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, + expectField: "request.generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + } + + runThinkingTests(t, cases) +} + +// getTestModels returns the shared model definitions for E2E tests. +func getTestModels() []*registry.ModelInfo { + return []*registry.ModelInfo{ + { + ID: "level-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "openai", + DisplayName: "Level Model", + Thinking: ®istry.ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}, ZeroAllowed: false, DynamicAllowed: false}, + }, + { + ID: "level-subset-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "gemini", + DisplayName: "Level Subset Model", + Thinking: ®istry.ThinkingSupport{Levels: []string{"low", "high"}, ZeroAllowed: false, DynamicAllowed: false}, + }, + { + ID: "gemini-budget-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "gemini", + DisplayName: "Gemini Budget Model", + Thinking: ®istry.ThinkingSupport{Min: 128, Max: 20000, ZeroAllowed: false, DynamicAllowed: true}, + }, + { + ID: "gemini-mixed-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "gemini", + DisplayName: "Gemini Mixed Model", + Thinking: ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false, DynamicAllowed: true}, + }, + { + ID: "claude-budget-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "claude", + DisplayName: "Claude Budget Model", + Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, + }, + { + ID: "antigravity-budget-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "gemini-cli", + DisplayName: "Antigravity Budget Model", + Thinking: ®istry.ThinkingSupport{Min: 128, Max: 20000, ZeroAllowed: true, DynamicAllowed: true}, + }, + { + ID: "no-thinking-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "openai", + DisplayName: "No Thinking Model", + Thinking: nil, + }, + { + ID: "user-defined-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "openai", + DisplayName: "User Defined Model", + UserDefined: true, + Thinking: nil, + }, + { + ID: "glm-test", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "iflow", + DisplayName: "GLM Test Model", + Thinking: ®istry.ThinkingSupport{Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}}, + }, + { + ID: "minimax-test", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "iflow", + DisplayName: "MiniMax Test Model", + Thinking: ®istry.ThinkingSupport{Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}}, + }, } } -var ( - thinkingTestModels = []string{ - "gpt-5", // level-based thinking model - "gemini-2.5-pro", // numeric-budget thinking model - "qwen3-code-plus", // no thinking support - "openai-compat", // allowCompat=true (OpenAI-compatible channel) - } - thinkingTestFromProtocols = []string{"openai", "claude", "gemini", "openai-response"} - thinkingTestToProtocols = []string{"gemini", "claude", "openai", "codex"} +// runThinkingTests runs thinking test cases using the real data flow path. +func runThinkingTests(t *testing.T, cases []thinkingTestCase) { + for _, tc := range cases { + tc := tc + testName := fmt.Sprintf("Case%s_%s->%s_%s", tc.name, tc.from, tc.to, tc.model) + t.Run(testName, func(t *testing.T) { + suffixResult := thinking.ParseSuffix(tc.model) + baseModel := suffixResult.ModelName - // Numeric budgets and their level equivalents: - // -1 -> auto - // 0 -> none - // 1..1024 -> low - // 1025..8192 -> medium - // 8193..24576 -> high - // >24576 -> model highest level (right-most in Levels) - thinkingNumericSamples = []int{-1, 0, 1023, 1025, 8193, 64000} - - // Levels and their numeric equivalents: - // auto -> -1 - // none -> 0 - // minimal -> 512 - // low -> 1024 - // medium -> 8192 - // high -> 24576 - // xhigh -> 32768 - // invalid -> invalid (no mapping) - thinkingLevelSamples = []string{"auto", "none", "minimal", "low", "medium", "high", "xhigh", "invalid"} -) - -func buildRawPayload(fromProtocol, modelWithSuffix string) []byte { - switch fromProtocol { - case "gemini": - return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix)) - case "openai-response": - return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix)) - default: // openai / claude and other chat-style payloads - return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix)) - } -} - -// normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks. -func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) { - body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat) - if err := executor.ValidateThinkingConfig(body, upstreamModel); err != nil { - return body, err - } - body, _ = sjson.SetBytes(body, "model", upstreamModel) - body, _ = sjson.SetBytes(body, "stream", true) - body, _ = sjson.DeleteBytes(body, "previous_response_id") - return body, nil -} - -// buildBodyForProtocol runs a minimal request through the same translation and -// thinking pipeline used in executors for the given target protocol. -func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) { - t.Helper() - normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix) - upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata) - raw := buildRawPayload(fromProtocol, modelWithSuffix) - stream := fromProtocol != toProtocol - - body := sdktranslator.TranslateRequest( - sdktranslator.FromString(fromProtocol), - sdktranslator.FromString(toProtocol), - normalizedModel, - raw, - stream, - ) - - var err error - allowCompat := isOpenAICompatModel(normalizedModel) - switch toProtocol { - case "gemini": - body = executor.ApplyThinkingMetadata(body, metadata, normalizedModel) - body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body) - body = util.NormalizeGeminiThinkingBudget(normalizedModel, body) - body = util.StripThinkingConfigIfUnsupported(normalizedModel, body) - case "claude": - if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok { - body = util.ApplyClaudeThinkingConfig(body, budget) - } - case "openai": - body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning_effort", allowCompat) - body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat) - err = executor.ValidateThinkingConfig(body, upstreamModel) - case "codex": // OpenAI responses / codex - // Codex does not support allowCompat; always use false. - body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning.effort", false) - // Mirror CodexExecutor final normalization and model override so tests log the final body. - body, err = normalizeCodexPayload(body, upstreamModel, false) - default: - } - - // Mirror executor behavior: final payload uses the upstream (base) model name. - if upstreamModel != "" { - body, _ = sjson.SetBytes(body, "model", upstreamModel) - } - - // For tests we only keep model + thinking-related fields to avoid noise. - body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel) - return body, err -} - -// filterThinkingBody projects the translated payload down to only model and -// thinking-related fields for the given target protocol. -func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte { - if len(body) == 0 { - return body - } - out := []byte(`{}`) - - // Preserve model if present, otherwise fall back to upstream/normalized model. - if m := gjson.GetBytes(body, "model"); m.Exists() { - out, _ = sjson.SetBytes(out, "model", m.Value()) - } else if upstreamModel != "" { - out, _ = sjson.SetBytes(out, "model", upstreamModel) - } else if normalizedModel != "" { - out, _ = sjson.SetBytes(out, "model", normalizedModel) - } - - switch toProtocol { - case "gemini": - if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() { - out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw)) - } - case "claude": - if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() { - out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw)) - } - case "openai": - if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() { - out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value()) - } - case "codex": - if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() { - out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value()) - } - } - return out -} - -func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - type scenario struct { - name string - modelSuffix string - } - - numericName := func(budget int) string { - if budget < 0 { - return "numeric-neg1" - } - return fmt.Sprintf("numeric-%d", budget) - } - - for _, model := range thinkingTestModels { - _ = registry.GetGlobalRegistry().GetModelInfo(model) - - for _, from := range thinkingTestFromProtocols { - // Scenario selection follows protocol semantics: - // - OpenAI-style protocols (openai/openai-response) express thinking as levels. - // - Claude/Gemini-style protocols express thinking as numeric budgets. - cases := []scenario{ - {name: "no-suffix", modelSuffix: model}, + translateTo := tc.to + applyTo := tc.to + if tc.to == "iflow" { + translateTo = "openai" + applyTo = "iflow" } - if from == "openai" || from == "openai-response" { - for _, lvl := range thinkingLevelSamples { - cases = append(cases, scenario{ - name: "level-" + lvl, - modelSuffix: fmt.Sprintf("%s(%s)", model, lvl), - }) + + body := sdktranslator.TranslateRequest( + sdktranslator.FromString(tc.from), + sdktranslator.FromString(translateTo), + baseModel, + []byte(tc.inputJSON), + true, + ) + if applyTo == "claude" { + body, _ = sjson.SetBytes(body, "max_tokens", 200000) + } + + body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo) + + if tc.expectErr { + if err == nil { + t.Fatalf("expected error but got none, body=%s", string(body)) } - } else { // claude or gemini - for _, budget := range thinkingNumericSamples { - budget := budget - cases = append(cases, scenario{ - name: numericName(budget), - modelSuffix: fmt.Sprintf("%s(%d)", model, budget), - }) + return + } + if err != nil { + t.Fatalf("unexpected error: %v, body=%s", err, string(body)) + } + + if tc.expectField == "" { + var hasThinking bool + switch tc.to { + case "gemini": + hasThinking = gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() + case "gemini-cli": + hasThinking = gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() + case "antigravity": + hasThinking = gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() + case "claude": + hasThinking = gjson.GetBytes(body, "thinking").Exists() + case "openai": + hasThinking = gjson.GetBytes(body, "reasoning_effort").Exists() + case "codex": + hasThinking = gjson.GetBytes(body, "reasoning.effort").Exists() || gjson.GetBytes(body, "reasoning").Exists() + case "iflow": + hasThinking = gjson.GetBytes(body, "chat_template_kwargs.enable_thinking").Exists() || gjson.GetBytes(body, "reasoning_split").Exists() + } + if hasThinking { + t.Fatalf("expected no thinking field but found one, body=%s", string(body)) + } + return + } + + val := gjson.GetBytes(body, tc.expectField) + if !val.Exists() { + t.Fatalf("expected field %s not found, body=%s", tc.expectField, string(body)) + } + + actualValue := val.String() + if val.Type == gjson.Number { + actualValue = fmt.Sprintf("%d", val.Int()) + } + if actualValue != tc.expectValue { + t.Fatalf("field %s: expected %q, got %q, body=%s", tc.expectField, tc.expectValue, actualValue, string(body)) + } + + if tc.includeThoughts != "" && (tc.to == "gemini" || tc.to == "gemini-cli" || tc.to == "antigravity") { + path := "generationConfig.thinkingConfig.includeThoughts" + if tc.to == "gemini-cli" || tc.to == "antigravity" { + path = "request.generationConfig.thinkingConfig.includeThoughts" + } + itVal := gjson.GetBytes(body, path) + if !itVal.Exists() { + t.Fatalf("expected includeThoughts field not found, body=%s", string(body)) + } + actual := fmt.Sprintf("%v", itVal.Bool()) + if actual != tc.includeThoughts { + t.Fatalf("includeThoughts: expected %s, got %s, body=%s", tc.includeThoughts, actual, string(body)) } } - for _, to := range thinkingTestToProtocols { - if from == to { - continue + // Verify clear_thinking for iFlow GLM models when enable_thinking=true + if tc.to == "iflow" && tc.expectField == "chat_template_kwargs.enable_thinking" && tc.expectValue == "true" { + ctVal := gjson.GetBytes(body, "chat_template_kwargs.clear_thinking") + if !ctVal.Exists() { + t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body)) } - t.Logf("─────────────────────────────────────────────────────────────────────────────────") - t.Logf(" %s -> %s | model: %s", from, to, model) - t.Logf("─────────────────────────────────────────────────────────────────────────────────") - for _, cs := range cases { - from := from - to := to - cs := cs - testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name) - t.Run(testName, func(t *testing.T) { - normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix) - expectPresent, expectValue, expectErr := func() (bool, string, bool) { - switch to { - case "gemini": - budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata) - if !ok || !util.ModelSupportsThinking(normalizedModel) { - return false, "", false - } - if include != nil && !*include { - return false, "", false - } - if budget == nil { - return false, "", false - } - norm := util.NormalizeThinkingBudget(normalizedModel, *budget) - return true, fmt.Sprintf("%d", norm), false - case "claude": - if !util.ModelSupportsThinking(normalizedModel) { - return false, "", false - } - budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata) - if !ok || budget == nil { - return false, "", false - } - return true, fmt.Sprintf("%d", *budget), false - case "openai": - allowCompat := isOpenAICompatModel(normalizedModel) - if !util.ModelSupportsThinking(normalizedModel) && !allowCompat { - return false, "", false - } - // For allowCompat models, pass through effort directly without validation - if allowCompat { - effort, ok := util.ReasoningEffortFromMetadata(metadata) - if ok && strings.TrimSpace(effort) != "" { - return true, strings.ToLower(strings.TrimSpace(effort)), false - } - // Check numeric budget fallback for allowCompat - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { - return true, mapped, false - } - } - return false, "", false - } - if !util.ModelUsesThinkingLevels(normalizedModel) { - // Non-levels models don't support effort strings in openai - return false, "", false - } - effort, ok := util.ReasoningEffortFromMetadata(metadata) - if !ok || strings.TrimSpace(effort) == "" { - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap { - effort = mapped - ok = true - } - } - } - if !ok || strings.TrimSpace(effort) == "" { - return false, "", false - } - effort = strings.ToLower(strings.TrimSpace(effort)) - if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel { - return true, normalized, false - } - return false, "", true // validation would fail - case "codex": - // Codex does not support allowCompat; require thinking-capable level models. - if !util.ModelSupportsThinking(normalizedModel) || !util.ModelUsesThinkingLevels(normalizedModel) { - return false, "", false - } - effort, ok := util.ReasoningEffortFromMetadata(metadata) - if ok && strings.TrimSpace(effort) != "" { - effort = strings.ToLower(strings.TrimSpace(effort)) - if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel { - return true, normalized, false - } - return false, "", true - } - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { - mapped = strings.ToLower(strings.TrimSpace(mapped)) - if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel { - return true, normalized, false - } - return false, "", true - } - } - if from != "openai-response" { - // Codex translators default reasoning.effort to "medium" when - // no explicit thinking suffix/metadata is provided. - return true, "medium", false - } - return false, "", false - default: - return false, "", false - } - }() - - body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix) - actualPresent, actualValue := func() (bool, string) { - path := "" - switch to { - case "gemini": - path = "generationConfig.thinkingConfig.thinkingBudget" - case "claude": - path = "thinking.budget_tokens" - case "openai": - path = "reasoning_effort" - case "codex": - path = "reasoning.effort" - } - if path == "" { - return false, "" - } - val := gjson.GetBytes(body, path) - if to == "codex" && !val.Exists() { - reasoning := gjson.GetBytes(body, "reasoning") - if reasoning.Exists() { - val = reasoning.Get("effort") - } - } - if !val.Exists() { - return false, "" - } - if val.Type == gjson.Number { - return true, fmt.Sprintf("%d", val.Int()) - } - return true, val.String() - }() - - t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s", - from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body)) - - if expectErr { - if err == nil { - t.Fatalf("expected validation error but got none, body=%s", string(body)) - } - return - } - if err != nil { - t.Fatalf("unexpected error: %v body=%s", err, string(body)) - } - - if expectPresent != actualPresent { - t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body)) - } - if expectPresent && expectValue != actualValue { - t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body)) - } - }) + if ctVal.Bool() != false { + t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body)) } } - } - } -} - -// buildRawPayloadWithThinking creates a payload with thinking parameters already in the body. -// This tests the path where thinking comes from the raw payload, not model suffix. -func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any) []byte { - switch fromProtocol { - case "gemini": - base := fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, model) - if budget, ok := thinkingParam.(int); ok { - base, _ = sjson.Set(base, "generationConfig.thinkingConfig.thinkingBudget", budget) - } - return []byte(base) - case "openai-response": - base := fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, model) - if effort, ok := thinkingParam.(string); ok && effort != "" { - base, _ = sjson.Set(base, "reasoning.effort", effort) - } - return []byte(base) - case "openai": - base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model) - if effort, ok := thinkingParam.(string); ok && effort != "" { - base, _ = sjson.Set(base, "reasoning_effort", effort) - } - return []byte(base) - case "claude": - base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model) - if budget, ok := thinkingParam.(int); ok { - base, _ = sjson.Set(base, "thinking.type", "enabled") - base, _ = sjson.Set(base, "thinking.budget_tokens", budget) - } - return []byte(base) - default: - return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)) - } -} - -// buildBodyForProtocolWithRawThinking translates payload with raw thinking params. -func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, model string, thinkingParam any) ([]byte, error) { - t.Helper() - raw := buildRawPayloadWithThinking(fromProtocol, model, thinkingParam) - stream := fromProtocol != toProtocol - - body := sdktranslator.TranslateRequest( - sdktranslator.FromString(fromProtocol), - sdktranslator.FromString(toProtocol), - model, - raw, - stream, - ) - - var err error - allowCompat := isOpenAICompatModel(model) - switch toProtocol { - case "gemini": - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - case "claude": - // For raw payload, Claude thinking is passed through by translator - // No additional processing needed as thinking is already in body - case "openai": - body = executor.NormalizeThinkingConfig(body, model, allowCompat) - err = executor.ValidateThinkingConfig(body, model) - case "codex": - // Codex does not support allowCompat; always use false. - body, err = normalizeCodexPayload(body, model, false) - } - - body, _ = sjson.SetBytes(body, "model", model) - body = filterThinkingBody(toProtocol, body, model, model) - return body, err -} - -func TestRawPayloadThinkingConversions(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - type scenario struct { - name string - thinkingParam any // int for budget, string for effort level - } - - numericName := func(budget int) string { - if budget < 0 { - return "budget-neg1" - } - return fmt.Sprintf("budget-%d", budget) - } - - for _, model := range thinkingTestModels { - supportsThinking := util.ModelSupportsThinking(model) - usesLevels := util.ModelUsesThinkingLevels(model) - allowCompat := isOpenAICompatModel(model) - - for _, from := range thinkingTestFromProtocols { - var cases []scenario - switch from { - case "openai", "openai-response": - cases = []scenario{ - {name: "no-thinking", thinkingParam: nil}, - } - for _, lvl := range thinkingLevelSamples { - cases = append(cases, scenario{ - name: "effort-" + lvl, - thinkingParam: lvl, - }) - } - case "gemini", "claude": - cases = []scenario{ - {name: "no-thinking", thinkingParam: nil}, - } - for _, budget := range thinkingNumericSamples { - budget := budget - cases = append(cases, scenario{ - name: numericName(budget), - thinkingParam: budget, - }) - } - } - - for _, to := range thinkingTestToProtocols { - if from == to { - continue - } - t.Logf("═══════════════════════════════════════════════════════════════════════════════") - t.Logf(" RAW PAYLOAD: %s -> %s | model: %s", from, to, model) - t.Logf("═══════════════════════════════════════════════════════════════════════════════") - - for _, cs := range cases { - from := from - to := to - cs := cs - testName := fmt.Sprintf("raw/%s->%s/%s/%s", from, to, model, cs.name) - t.Run(testName, func(t *testing.T) { - expectPresent, expectValue, expectErr := func() (bool, string, bool) { - if cs.thinkingParam == nil { - if to == "codex" && from != "openai-response" && supportsThinking && usesLevels { - // Codex translators default reasoning.effort to "medium" for thinking-capable level models - return true, "medium", false - } - return false, "", false - } - - switch to { - case "gemini": - if !supportsThinking || usesLevels { - return false, "", false - } - // Gemini expects numeric budget (only for non-level models) - if budget, ok := cs.thinkingParam.(int); ok { - norm := util.NormalizeThinkingBudget(model, budget) - return true, fmt.Sprintf("%d", norm), false - } - // Convert effort level to budget for non-level models only - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - // "none" disables thinking - no thinkingBudget in output - if strings.ToLower(effort) == "none" { - return false, "", false - } - if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { - // ThinkingEffortToBudget already returns normalized budget - return true, fmt.Sprintf("%d", budget), false - } - // Invalid effort does not map to a budget - return false, "", false - } - return false, "", false - case "claude": - if !supportsThinking || usesLevels { - return false, "", false - } - // Claude expects numeric budget (only for non-level models) - if budget, ok := cs.thinkingParam.(int); ok && budget > 0 { - norm := util.NormalizeThinkingBudget(model, budget) - return true, fmt.Sprintf("%d", norm), false - } - // Convert effort level to budget for non-level models only - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - // "none" and "auto" don't produce budget_tokens - lower := strings.ToLower(effort) - if lower == "none" || lower == "auto" { - return false, "", false - } - if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { - // ThinkingEffortToBudget already returns normalized budget - return true, fmt.Sprintf("%d", budget), false - } - // Invalid effort - claude sets thinking.type:enabled but no budget_tokens - return false, "", false - } - return false, "", false - case "openai": - if allowCompat { - if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" { - normalized := strings.ToLower(strings.TrimSpace(effort)) - return true, normalized, false - } - if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { - return true, mapped, false - } - } - return false, "", false - } - if !supportsThinking || !usesLevels { - return false, "", false - } - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN { - return true, normalized, false - } - return false, "", true // invalid level - } - if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { - // Check if the mapped effort is valid for this model - if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel { - return true, mapped, true // expect validation error - } - return true, mapped, false - } - } - return false, "", false - case "codex": - // Codex does not support allowCompat; require thinking-capable level models. - if !supportsThinking || !usesLevels { - return false, "", false - } - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN { - return true, normalized, false - } - return false, "", true - } - if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { - // Check if the mapped effort is valid for this model - if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel { - return true, mapped, true // expect validation error - } - return true, mapped, false - } - } - if from != "openai-response" { - // Codex translators default reasoning.effort to "medium" for thinking-capable models - return true, "medium", false - } - return false, "", false - } - return false, "", false - }() - - body, err := buildBodyForProtocolWithRawThinking(t, from, to, model, cs.thinkingParam) - actualPresent, actualValue := func() (bool, string) { - path := "" - switch to { - case "gemini": - path = "generationConfig.thinkingConfig.thinkingBudget" - case "claude": - path = "thinking.budget_tokens" - case "openai": - path = "reasoning_effort" - case "codex": - path = "reasoning.effort" - } - if path == "" { - return false, "" - } - val := gjson.GetBytes(body, path) - if to == "codex" && !val.Exists() { - reasoning := gjson.GetBytes(body, "reasoning") - if reasoning.Exists() { - val = reasoning.Get("effort") - } - } - if !val.Exists() { - return false, "" - } - if val.Type == gjson.Number { - return true, fmt.Sprintf("%d", val.Int()) - } - return true, val.String() - }() - - t.Logf("from=%s to=%s model=%s param=%v present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s", - from, to, model, cs.thinkingParam, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body)) - - if expectErr { - if err == nil { - t.Fatalf("expected validation error but got none, body=%s", string(body)) - } - return - } - if err != nil { - t.Fatalf("unexpected error: %v body=%s", err, string(body)) - } - - if expectPresent != actualPresent { - t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body)) - } - if expectPresent && expectValue != actualValue { - t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body)) - } - }) - } - } - } - } -} - -func TestThinkingBudgetToEffort(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - cases := []struct { - name string - model string - budget int - want string - ok bool - }{ - {name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true}, - {name: "zero-none", model: "gpt-5", budget: 0, want: "minimal", ok: true}, - {name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true}, - {name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true}, - {name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true}, - {name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true}, - {name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true}, - {name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true}, - {name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true}, - {name: "over-max-xhigh-model", model: "gpt-5.2", budget: 64000, want: "xhigh", ok: true}, - {name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false}, - } - - for _, cs := range cases { - cs := cs - t.Run(cs.name, func(t *testing.T) { - got, ok := util.ThinkingBudgetToEffort(cs.model, cs.budget) - if ok != cs.ok { - t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok) - } - if got != cs.want { - t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got) - } - }) - } -} - -func TestThinkingEffortToBudget(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - cases := []struct { - name string - model string - effort string - want int - ok bool - }{ - {name: "none", model: "gemini-2.5-pro", effort: "none", want: 0, ok: true}, - {name: "auto", model: "gemini-2.5-pro", effort: "auto", want: -1, ok: true}, - {name: "minimal", model: "gemini-2.5-pro", effort: "minimal", want: 512, ok: true}, - {name: "low", model: "gemini-2.5-pro", effort: "low", want: 1024, ok: true}, - {name: "medium", model: "gemini-2.5-pro", effort: "medium", want: 8192, ok: true}, - {name: "high", model: "gemini-2.5-pro", effort: "high", want: 24576, ok: true}, - {name: "xhigh", model: "gemini-2.5-pro", effort: "xhigh", want: 32768, ok: true}, - {name: "empty-unsupported", model: "gemini-2.5-pro", effort: "", want: 0, ok: false}, - {name: "invalid-unsupported", model: "gemini-2.5-pro", effort: "ultra", want: 0, ok: false}, - {name: "case-insensitive", model: "gemini-2.5-pro", effort: "LOW", want: 1024, ok: true}, - {name: "case-insensitive-medium", model: "gemini-2.5-pro", effort: "MEDIUM", want: 8192, ok: true}, - } - - for _, cs := range cases { - cs := cs - t.Run(cs.name, func(t *testing.T) { - got, ok := util.ThinkingEffortToBudget(cs.model, cs.effort) - if ok != cs.ok { - t.Fatalf("ok mismatch for model=%s effort=%s: expect %v got %v", cs.model, cs.effort, cs.ok, ok) - } - if got != cs.want { - t.Fatalf("value mismatch for model=%s effort=%s: expect %d got %d", cs.model, cs.effort, cs.want, got) - } }) } }