mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-02 12:30:50 +08:00
Compare commits
111 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
020e61d0da | ||
|
|
6184c43319 | ||
|
|
2cbe4a790c | ||
|
|
68b3565d7b | ||
|
|
3f385a8572 | ||
|
|
9823dc35e1 | ||
|
|
059bfee91b | ||
|
|
7beaf0eaa2 | ||
|
|
1fef90ff58 | ||
|
|
8447fd27a0 | ||
|
|
7831cba9f6 | ||
|
|
e02b2d58d5 | ||
|
|
28726632a9 | ||
|
|
3b26129c82 | ||
|
|
d4bb4e6624 | ||
|
|
0766c49f93 | ||
|
|
a7ffc77e3d | ||
|
|
e641fde25c | ||
|
|
5717c7f2f4 | ||
|
|
8734d4cb90 | ||
|
|
5baa753539 | ||
|
|
ead98e4bca | ||
|
|
1d2fe55310 | ||
|
|
c175821cc4 | ||
|
|
239a28793c | ||
|
|
c421d653e7 | ||
|
|
2542c2920d | ||
|
|
52e46ced1b | ||
|
|
cf9daf470c | ||
|
|
140d6211cc | ||
|
|
60f9a1442c | ||
|
|
cb6caf3f87 | ||
|
|
99c7abbbf1 | ||
|
|
8f511ac33c | ||
|
|
1046152119 | ||
|
|
f88228f1c5 | ||
|
|
62e2b672d9 | ||
|
|
03005b5d29 | ||
|
|
c7e8830a56 | ||
|
|
d5ef4a6d15 | ||
|
|
97b67e0e49 | ||
|
|
dd6d78cb31 | ||
|
|
46433a25f8 | ||
|
|
c8843edb81 | ||
|
|
f89feb881c | ||
|
|
dbba71028e | ||
|
|
8549a92e9a | ||
|
|
109cffc010 | ||
|
|
f8f3ad84fc | ||
|
|
bc7167e9fe | ||
|
|
384578a88c | ||
|
|
65b4e1ec6c | ||
|
|
6600d58ba2 | ||
|
|
4dc7af5a5d | ||
|
|
902bea24b4 | ||
|
|
c3ef46f409 | ||
|
|
aa0b63e214 | ||
|
|
ea3d22831e | ||
|
|
3b4d6d359b | ||
|
|
48cba39a12 | ||
|
|
cec4e251bd | ||
|
|
526dd866ba | ||
|
|
b31ddc7bf1 | ||
|
|
22e1ad3d8a | ||
|
|
f571b1deb0 | ||
|
|
67f8732683 | ||
|
|
2b387e169b | ||
|
|
199cf480b0 | ||
|
|
4ad6189487 | ||
|
|
fe5b3c80cb | ||
|
|
e0ffec885c | ||
|
|
ff4ff6bc2f | ||
|
|
7248f65c36 | ||
|
|
5c40a2db21 | ||
|
|
086eb3df7a | ||
|
|
ee2976cca0 | ||
|
|
8bc6df329f | ||
|
|
bcd4d9595f | ||
|
|
5a77b7728e | ||
|
|
1fbbba6f59 | ||
|
|
847be0e99d | ||
|
|
f6a2d072e6 | ||
|
|
ed8b0f25ee | ||
|
|
6e4a602c60 | ||
|
|
2262479365 | ||
|
|
33d66959e9 | ||
|
|
7f1b2b3f6e | ||
|
|
40ee065eff | ||
|
|
a75fb6af90 | ||
|
|
72f2125668 | ||
|
|
e8f5888d8e | ||
|
|
0b06d637e7 | ||
|
|
5a7e5bd870 | ||
|
|
6f8a8f8136 | ||
|
|
5df195ea82 | ||
|
|
b163f8ed9e | ||
|
|
a1da6ff5ac | ||
|
|
5977af96a0 | ||
|
|
43652d044c | ||
|
|
b1b379ea18 | ||
|
|
5bb9c2a2bd | ||
|
|
0b5bbe9234 | ||
|
|
14c74e5e84 | ||
|
|
6448d0ee7c | ||
|
|
b0c17af2cf | ||
|
|
aa8526edc0 | ||
|
|
ac3ca0ad8e | ||
|
|
08d21b76e2 | ||
|
|
33aa665555 | ||
|
|
00280b6fe8 | ||
|
|
52760a4eaa |
@@ -130,6 +130,10 @@ Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth
|
||||
|
||||
VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.
|
||||
|
||||
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
|
||||
|
||||
Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
|
||||
|
||||
> [!NOTE]
|
||||
> If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
|
||||
|
||||
|
||||
@@ -129,6 +129,10 @@ CLI 封装器,用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户
|
||||
|
||||
一款 VSCode 扩展,提供了在 VSCode 中快速切换 Claude Code 模型的功能,内置 CLIProxyAPI 作为其后端,支持后台自动启动和关闭。
|
||||
|
||||
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
|
||||
|
||||
Windows 桌面应用,基于 Tauri + React 构建,用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪,提供实时仪表盘、系统托盘集成和一键代理控制,无需 API 密钥。
|
||||
|
||||
> [!NOTE]
|
||||
> 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR(拉取请求)将其添加到此列表中。
|
||||
|
||||
|
||||
@@ -61,6 +61,7 @@ func main() {
|
||||
var iflowLogin bool
|
||||
var iflowCookie bool
|
||||
var noBrowser bool
|
||||
var oauthCallbackPort int
|
||||
var antigravityLogin bool
|
||||
var projectID string
|
||||
var vertexImport string
|
||||
@@ -75,6 +76,7 @@ func main() {
|
||||
flag.BoolVar(&iflowLogin, "iflow-login", false, "Login to iFlow using OAuth")
|
||||
flag.BoolVar(&iflowCookie, "iflow-cookie", false, "Login to iFlow using Cookie")
|
||||
flag.BoolVar(&noBrowser, "no-browser", false, "Don't open browser automatically for OAuth")
|
||||
flag.IntVar(&oauthCallbackPort, "oauth-callback-port", 0, "Override OAuth callback port (defaults to provider-specific port)")
|
||||
flag.BoolVar(&antigravityLogin, "antigravity-login", false, "Login to Antigravity using OAuth")
|
||||
flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
|
||||
flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
|
||||
@@ -425,7 +427,8 @@ func main() {
|
||||
|
||||
// Create login options to be used in authentication flows.
|
||||
options := &cmd.LoginOptions{
|
||||
NoBrowser: noBrowser,
|
||||
NoBrowser: noBrowser,
|
||||
CallbackPort: oauthCallbackPort,
|
||||
}
|
||||
|
||||
// Register the shared token store once so all components use the same persistence backend.
|
||||
|
||||
@@ -77,11 +77,18 @@ routing:
|
||||
# When true, enable authentication for the WebSocket API (/v1/ws).
|
||||
ws-auth: false
|
||||
|
||||
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
|
||||
nonstream-keepalive-interval: 0
|
||||
|
||||
# Streaming behavior (SSE keep-alives + safe bootstrap retries).
|
||||
# streaming:
|
||||
# keepalive-seconds: 15 # Default: 0 (disabled). <= 0 disables keep-alives.
|
||||
# bootstrap-retries: 1 # Default: 0 (disabled). Retries before first byte is sent.
|
||||
|
||||
# When true, enable official Codex instructions injection for Codex API requests.
|
||||
# When false (default), CodexInstructionsForModel returns immediately without modification.
|
||||
codex-instructions-enabled: false
|
||||
|
||||
# Gemini API keys
|
||||
# gemini-api-key:
|
||||
# - api-key: "AIzaSy...01"
|
||||
@@ -134,6 +141,15 @@ ws-auth: false
|
||||
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
|
||||
# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
||||
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
|
||||
# cloak: # optional: request cloaking for non-Claude-Code clients
|
||||
# mode: "auto" # "auto" (default): cloak only when client is not Claude Code
|
||||
# # "always": always apply cloaking
|
||||
# # "never": never apply cloaking
|
||||
# strict-mode: false # false (default): prepend Claude Code prompt to user system messages
|
||||
# # true: strip all user system messages, keep only Claude Code prompt
|
||||
# sensitive-words: # optional: words to obfuscate with zero-width characters
|
||||
# - "API"
|
||||
# - "proxy"
|
||||
|
||||
# OpenAI compatibility providers
|
||||
# openai-compatibility:
|
||||
@@ -198,12 +214,27 @@ ws-auth: false
|
||||
# - from: "claude-haiku-4-5-20251001"
|
||||
# to: "gemini-2.5-flash"
|
||||
|
||||
# Global OAuth model name mappings (per channel)
|
||||
# These mappings rename model IDs for both model listing and request routing.
|
||||
# Global OAuth model name aliases (per channel)
|
||||
# These aliases rename model IDs for both model listing and request routing.
|
||||
# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
|
||||
# NOTE: Mappings do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
|
||||
# NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
|
||||
# You can repeat the same name with different aliases to expose multiple client model names.
|
||||
# oauth-model-mappings:
|
||||
oauth-model-alias:
|
||||
antigravity:
|
||||
- name: "rev19-uic3-1p"
|
||||
alias: "gemini-2.5-computer-use-preview-10-2025"
|
||||
- name: "gemini-3-pro-image"
|
||||
alias: "gemini-3-pro-image-preview"
|
||||
- name: "gemini-3-pro-high"
|
||||
alias: "gemini-3-pro-preview"
|
||||
- name: "gemini-3-flash"
|
||||
alias: "gemini-3-flash-preview"
|
||||
- name: "claude-sonnet-4-5"
|
||||
alias: "gemini-claude-sonnet-4-5"
|
||||
- name: "claude-sonnet-4-5-thinking"
|
||||
alias: "gemini-claude-sonnet-4-5-thinking"
|
||||
- name: "claude-opus-4-5-thinking"
|
||||
alias: "gemini-claude-opus-4-5-thinking"
|
||||
# gemini-cli:
|
||||
# - name: "gemini-2.5-pro" # original model name under this channel
|
||||
# alias: "g2.5p" # client-visible alias
|
||||
@@ -214,9 +245,6 @@ ws-auth: false
|
||||
# aistudio:
|
||||
# - name: "gemini-2.5-pro"
|
||||
# alias: "g2.5p"
|
||||
# antigravity:
|
||||
# - name: "gemini-3-pro-preview"
|
||||
# alias: "g3p"
|
||||
# claude:
|
||||
# - name: "claude-sonnet-4-5-20250929"
|
||||
# alias: "cs4.5"
|
||||
@@ -260,9 +288,21 @@ ws-auth: false
|
||||
# protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
|
||||
# params: # JSON path (gjson/sjson syntax) -> value
|
||||
# "generationConfig.thinkingConfig.thinkingBudget": 32768
|
||||
# default-raw: # Default raw rules set parameters using raw JSON when missing (must be valid JSON).
|
||||
# - models:
|
||||
# - name: "gemini-2.5-pro" # Supports wildcards (e.g., "gemini-*")
|
||||
# protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
|
||||
# params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
|
||||
# "generationConfig.responseJsonSchema": "{\"type\":\"object\",\"properties\":{\"answer\":{\"type\":\"string\"}}}"
|
||||
# override: # Override rules always set parameters, overwriting any existing values.
|
||||
# - models:
|
||||
# - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
|
||||
# protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
|
||||
# params: # JSON path (gjson/sjson syntax) -> value
|
||||
# "reasoning.effort": "high"
|
||||
# override-raw: # Override raw rules always set parameters using raw JSON (must be valid JSON).
|
||||
# - models:
|
||||
# - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
|
||||
# protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
|
||||
# params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
|
||||
# "response_format": "{\"type\":\"json_schema\",\"json_schema\":{\"name\":\"answer\",\"schema\":{\"type\":\"object\"}}}"
|
||||
|
||||
@@ -22,7 +22,7 @@ services:
|
||||
- "51121:51121"
|
||||
- "11451:11451"
|
||||
volumes:
|
||||
- ./config.yaml:/CLIProxyAPI/config.yaml
|
||||
- ./auths:/root/.cli-proxy-api
|
||||
- ./logs:/CLIProxyAPI/logs
|
||||
- ${CLI_PROXY_CONFIG_PATH:-./config.yaml}:/CLIProxyAPI/config.yaml
|
||||
- ${CLI_PROXY_AUTH_PATH:-./auths}:/root/.cli-proxy-api
|
||||
- ${CLI_PROXY_LOG_PATH:-./logs}:/CLIProxyAPI/logs
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -3,6 +3,8 @@ package management
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -1383,9 +1385,16 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
|
||||
claims, _ := codex.ParseJWTToken(tokenResp.IDToken)
|
||||
email := ""
|
||||
accountID := ""
|
||||
planType := ""
|
||||
if claims != nil {
|
||||
email = claims.GetUserEmail()
|
||||
accountID = claims.GetAccountID()
|
||||
planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
|
||||
}
|
||||
hashAccountID := ""
|
||||
if accountID != "" {
|
||||
digest := sha256.Sum256([]byte(accountID))
|
||||
hashAccountID = hex.EncodeToString(digest[:])[:8]
|
||||
}
|
||||
// Build bundle compatible with existing storage
|
||||
bundle := &codex.CodexAuthBundle{
|
||||
@@ -1402,10 +1411,11 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
|
||||
|
||||
// Create token storage and persist
|
||||
tokenStorage := openaiAuth.CreateTokenStorage(bundle)
|
||||
fileName := codex.CredentialFileName(tokenStorage.Email, planType, hashAccountID, true)
|
||||
record := &coreauth.Auth{
|
||||
ID: fmt.Sprintf("codex-%s.json", tokenStorage.Email),
|
||||
ID: fileName,
|
||||
Provider: "codex",
|
||||
FileName: fmt.Sprintf("codex-%s.json", tokenStorage.Email),
|
||||
FileName: fileName,
|
||||
Storage: tokenStorage,
|
||||
Metadata: map[string]any{
|
||||
"email": tokenStorage.Email,
|
||||
@@ -1703,7 +1713,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
|
||||
// Create token storage
|
||||
tokenStorage := qwenAuth.CreateTokenStorage(tokenData)
|
||||
|
||||
tokenStorage.Email = fmt.Sprintf("qwen-%d", time.Now().UnixMilli())
|
||||
tokenStorage.Email = fmt.Sprintf("%d", time.Now().UnixMilli())
|
||||
record := &coreauth.Auth{
|
||||
ID: fmt.Sprintf("qwen-%s.json", tokenStorage.Email),
|
||||
Provider: "qwen",
|
||||
@@ -1808,7 +1818,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
|
||||
tokenStorage := authSvc.CreateTokenStorage(tokenData)
|
||||
identifier := strings.TrimSpace(tokenStorage.Email)
|
||||
if identifier == "" {
|
||||
identifier = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
|
||||
identifier = fmt.Sprintf("%d", time.Now().UnixMilli())
|
||||
tokenStorage.Email = identifier
|
||||
}
|
||||
record := &coreauth.Auth{
|
||||
@@ -1893,15 +1903,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
|
||||
fileName := iflowauth.SanitizeIFlowFileName(email)
|
||||
if fileName == "" {
|
||||
fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
|
||||
} else {
|
||||
fileName = fmt.Sprintf("iflow-%s", fileName)
|
||||
}
|
||||
|
||||
tokenStorage.Email = email
|
||||
timestamp := time.Now().Unix()
|
||||
|
||||
record := &coreauth.Auth{
|
||||
ID: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
|
||||
ID: fmt.Sprintf("%s-%d.json", fileName, timestamp),
|
||||
Provider: "iflow",
|
||||
FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
|
||||
FileName: fmt.Sprintf("%s-%d.json", fileName, timestamp),
|
||||
Storage: tokenStorage,
|
||||
Metadata: map[string]any{
|
||||
"email": email,
|
||||
@@ -2108,7 +2120,20 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
|
||||
finalProjectID := projectID
|
||||
if responseProjectID != "" {
|
||||
if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
|
||||
log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
|
||||
// Check if this is a free user (gen-lang-client projects or free/legacy tier)
|
||||
isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
|
||||
strings.EqualFold(tierID, "FREE") ||
|
||||
strings.EqualFold(tierID, "LEGACY")
|
||||
|
||||
if isFreeUser {
|
||||
// For free users, use backend project ID for preview model access
|
||||
log.Infof("Gemini onboarding: frontend project %s maps to backend project %s", projectID, responseProjectID)
|
||||
log.Infof("Using backend project ID: %s (recommended for preview model access)", responseProjectID)
|
||||
finalProjectID = responseProjectID
|
||||
} else {
|
||||
// Pro users: keep requested project ID (original behavior)
|
||||
log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
|
||||
}
|
||||
} else {
|
||||
finalProjectID = responseProjectID
|
||||
}
|
||||
|
||||
@@ -703,21 +703,21 @@ func (h *Handler) DeleteOAuthExcludedModels(c *gin.Context) {
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
// oauth-model-mappings: map[string][]ModelNameMapping
|
||||
func (h *Handler) GetOAuthModelMappings(c *gin.Context) {
|
||||
c.JSON(200, gin.H{"oauth-model-mappings": sanitizedOAuthModelMappings(h.cfg.OAuthModelMappings)})
|
||||
// oauth-model-alias: map[string][]OAuthModelAlias
|
||||
func (h *Handler) GetOAuthModelAlias(c *gin.Context) {
|
||||
c.JSON(200, gin.H{"oauth-model-alias": sanitizedOAuthModelAlias(h.cfg.OAuthModelAlias)})
|
||||
}
|
||||
|
||||
func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
|
||||
func (h *Handler) PutOAuthModelAlias(c *gin.Context) {
|
||||
data, err := c.GetRawData()
|
||||
if err != nil {
|
||||
c.JSON(400, gin.H{"error": "failed to read body"})
|
||||
return
|
||||
}
|
||||
var entries map[string][]config.ModelNameMapping
|
||||
var entries map[string][]config.OAuthModelAlias
|
||||
if err = json.Unmarshal(data, &entries); err != nil {
|
||||
var wrapper struct {
|
||||
Items map[string][]config.ModelNameMapping `json:"items"`
|
||||
Items map[string][]config.OAuthModelAlias `json:"items"`
|
||||
}
|
||||
if err2 := json.Unmarshal(data, &wrapper); err2 != nil {
|
||||
c.JSON(400, gin.H{"error": "invalid body"})
|
||||
@@ -725,15 +725,15 @@ func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
|
||||
}
|
||||
entries = wrapper.Items
|
||||
}
|
||||
h.cfg.OAuthModelMappings = sanitizedOAuthModelMappings(entries)
|
||||
h.cfg.OAuthModelAlias = sanitizedOAuthModelAlias(entries)
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
|
||||
func (h *Handler) PatchOAuthModelAlias(c *gin.Context) {
|
||||
var body struct {
|
||||
Provider *string `json:"provider"`
|
||||
Channel *string `json:"channel"`
|
||||
Mappings []config.ModelNameMapping `json:"mappings"`
|
||||
Provider *string `json:"provider"`
|
||||
Channel *string `json:"channel"`
|
||||
Aliases []config.OAuthModelAlias `json:"aliases"`
|
||||
}
|
||||
if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
|
||||
c.JSON(400, gin.H{"error": "invalid body"})
|
||||
@@ -751,32 +751,32 @@ func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
normalizedMap := sanitizedOAuthModelMappings(map[string][]config.ModelNameMapping{channel: body.Mappings})
|
||||
normalizedMap := sanitizedOAuthModelAlias(map[string][]config.OAuthModelAlias{channel: body.Aliases})
|
||||
normalized := normalizedMap[channel]
|
||||
if len(normalized) == 0 {
|
||||
if h.cfg.OAuthModelMappings == nil {
|
||||
if h.cfg.OAuthModelAlias == nil {
|
||||
c.JSON(404, gin.H{"error": "channel not found"})
|
||||
return
|
||||
}
|
||||
if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
|
||||
if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
|
||||
c.JSON(404, gin.H{"error": "channel not found"})
|
||||
return
|
||||
}
|
||||
delete(h.cfg.OAuthModelMappings, channel)
|
||||
if len(h.cfg.OAuthModelMappings) == 0 {
|
||||
h.cfg.OAuthModelMappings = nil
|
||||
delete(h.cfg.OAuthModelAlias, channel)
|
||||
if len(h.cfg.OAuthModelAlias) == 0 {
|
||||
h.cfg.OAuthModelAlias = nil
|
||||
}
|
||||
h.persist(c)
|
||||
return
|
||||
}
|
||||
if h.cfg.OAuthModelMappings == nil {
|
||||
h.cfg.OAuthModelMappings = make(map[string][]config.ModelNameMapping)
|
||||
if h.cfg.OAuthModelAlias == nil {
|
||||
h.cfg.OAuthModelAlias = make(map[string][]config.OAuthModelAlias)
|
||||
}
|
||||
h.cfg.OAuthModelMappings[channel] = normalized
|
||||
h.cfg.OAuthModelAlias[channel] = normalized
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
|
||||
func (h *Handler) DeleteOAuthModelAlias(c *gin.Context) {
|
||||
channel := strings.ToLower(strings.TrimSpace(c.Query("channel")))
|
||||
if channel == "" {
|
||||
channel = strings.ToLower(strings.TrimSpace(c.Query("provider")))
|
||||
@@ -785,17 +785,17 @@ func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
|
||||
c.JSON(400, gin.H{"error": "missing channel"})
|
||||
return
|
||||
}
|
||||
if h.cfg.OAuthModelMappings == nil {
|
||||
if h.cfg.OAuthModelAlias == nil {
|
||||
c.JSON(404, gin.H{"error": "channel not found"})
|
||||
return
|
||||
}
|
||||
if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
|
||||
if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
|
||||
c.JSON(404, gin.H{"error": "channel not found"})
|
||||
return
|
||||
}
|
||||
delete(h.cfg.OAuthModelMappings, channel)
|
||||
if len(h.cfg.OAuthModelMappings) == 0 {
|
||||
h.cfg.OAuthModelMappings = nil
|
||||
delete(h.cfg.OAuthModelAlias, channel)
|
||||
if len(h.cfg.OAuthModelAlias) == 0 {
|
||||
h.cfg.OAuthModelAlias = nil
|
||||
}
|
||||
h.persist(c)
|
||||
}
|
||||
@@ -1042,26 +1042,26 @@ func normalizeVertexCompatKey(entry *config.VertexCompatKey) {
|
||||
entry.Models = normalized
|
||||
}
|
||||
|
||||
func sanitizedOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string][]config.ModelNameMapping {
|
||||
func sanitizedOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string][]config.OAuthModelAlias {
|
||||
if len(entries) == 0 {
|
||||
return nil
|
||||
}
|
||||
copied := make(map[string][]config.ModelNameMapping, len(entries))
|
||||
for channel, mappings := range entries {
|
||||
if len(mappings) == 0 {
|
||||
copied := make(map[string][]config.OAuthModelAlias, len(entries))
|
||||
for channel, aliases := range entries {
|
||||
if len(aliases) == 0 {
|
||||
continue
|
||||
}
|
||||
copied[channel] = append([]config.ModelNameMapping(nil), mappings...)
|
||||
copied[channel] = append([]config.OAuthModelAlias(nil), aliases...)
|
||||
}
|
||||
if len(copied) == 0 {
|
||||
return nil
|
||||
}
|
||||
cfg := config.Config{OAuthModelMappings: copied}
|
||||
cfg.SanitizeOAuthModelMappings()
|
||||
if len(cfg.OAuthModelMappings) == 0 {
|
||||
cfg := config.Config{OAuthModelAlias: copied}
|
||||
cfg.SanitizeOAuthModelAlias()
|
||||
if len(cfg.OAuthModelAlias) == 0 {
|
||||
return nil
|
||||
}
|
||||
return cfg.OAuthModelMappings
|
||||
return cfg.OAuthModelAlias
|
||||
}
|
||||
|
||||
// GetAmpCode returns the complete ampcode configuration.
|
||||
|
||||
@@ -13,7 +13,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -360,16 +360,7 @@ func (h *Handler) logDirectory() string {
|
||||
if h.logDir != "" {
|
||||
return h.logDir
|
||||
}
|
||||
if base := util.WritablePath(); base != "" {
|
||||
return filepath.Join(base, "logs")
|
||||
}
|
||||
if h.configFilePath != "" {
|
||||
dir := filepath.Dir(h.configFilePath)
|
||||
if dir != "" && dir != "." {
|
||||
return filepath.Join(dir, "logs")
|
||||
}
|
||||
}
|
||||
return "logs"
|
||||
return logging.ResolveLogDirectory(h.cfg)
|
||||
}
|
||||
|
||||
func (h *Handler) collectLogFiles(dir string) ([]string, error) {
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -134,10 +135,11 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
||||
}
|
||||
|
||||
// Normalize model (handles dynamic thinking suffixes)
|
||||
normalizedModel, thinkingMetadata := util.NormalizeThinkingModel(modelName)
|
||||
suffixResult := thinking.ParseSuffix(modelName)
|
||||
normalizedModel := suffixResult.ModelName
|
||||
thinkingSuffix := ""
|
||||
if thinkingMetadata != nil && strings.HasPrefix(modelName, normalizedModel) {
|
||||
thinkingSuffix = modelName[len(normalizedModel):]
|
||||
if suffixResult.HasSuffix {
|
||||
thinkingSuffix = "(" + suffixResult.RawSuffix + ")"
|
||||
}
|
||||
|
||||
resolveMappedModel := func() (string, []string) {
|
||||
@@ -157,13 +159,13 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
||||
// Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target
|
||||
// already specifies its own thinking suffix.
|
||||
if thinkingSuffix != "" {
|
||||
_, mappedThinkingMetadata := util.NormalizeThinkingModel(mappedModel)
|
||||
if mappedThinkingMetadata == nil {
|
||||
mappedSuffixResult := thinking.ParseSuffix(mappedModel)
|
||||
if !mappedSuffixResult.HasSuffix {
|
||||
mappedModel += thinkingSuffix
|
||||
}
|
||||
}
|
||||
|
||||
mappedBaseModel, _ := util.NormalizeThinkingModel(mappedModel)
|
||||
mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName
|
||||
mappedProviders := util.GetProviderName(mappedBaseModel)
|
||||
if len(mappedProviders) == 0 {
|
||||
return "", nil
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -44,6 +45,11 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
|
||||
// MapModel checks if a mapping exists for the requested model and if the
|
||||
// target model has available local providers. Returns the mapped model name
|
||||
// or empty string if no valid mapping exists.
|
||||
//
|
||||
// If the requested model contains a thinking suffix (e.g., "g25p(8192)"),
|
||||
// the suffix is preserved in the returned model name (e.g., "gemini-2.5-pro(8192)").
|
||||
// However, if the mapping target already contains a suffix, the config suffix
|
||||
// takes priority over the user's suffix.
|
||||
func (m *DefaultModelMapper) MapModel(requestedModel string) string {
|
||||
if requestedModel == "" {
|
||||
return ""
|
||||
@@ -52,16 +58,20 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
// Normalize the requested model for lookup
|
||||
normalizedRequest := strings.ToLower(strings.TrimSpace(requestedModel))
|
||||
// Extract thinking suffix from requested model using ParseSuffix
|
||||
requestResult := thinking.ParseSuffix(requestedModel)
|
||||
baseModel := requestResult.ModelName
|
||||
|
||||
// Check for direct mapping
|
||||
targetModel, exists := m.mappings[normalizedRequest]
|
||||
// Normalize the base model for lookup (case-insensitive)
|
||||
normalizedBase := strings.ToLower(strings.TrimSpace(baseModel))
|
||||
|
||||
// Check for direct mapping using base model name
|
||||
targetModel, exists := m.mappings[normalizedBase]
|
||||
if !exists {
|
||||
// Try regex mappings in order
|
||||
base, _ := util.NormalizeThinkingModel(requestedModel)
|
||||
// Try regex mappings in order using base model only
|
||||
// (suffix is handled separately via ParseSuffix)
|
||||
for _, rm := range m.regexps {
|
||||
if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) {
|
||||
if rm.re.MatchString(baseModel) {
|
||||
targetModel = rm.to
|
||||
exists = true
|
||||
break
|
||||
@@ -72,14 +82,28 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify target model has available providers
|
||||
normalizedTarget, _ := util.NormalizeThinkingModel(targetModel)
|
||||
providers := util.GetProviderName(normalizedTarget)
|
||||
// Check if target model already has a thinking suffix (config priority)
|
||||
targetResult := thinking.ParseSuffix(targetModel)
|
||||
|
||||
// Verify target model has available providers (use base model for lookup)
|
||||
providers := util.GetProviderName(targetResult.ModelName)
|
||||
if len(providers) == 0 {
|
||||
log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Suffix handling: config suffix takes priority, otherwise preserve user suffix
|
||||
if targetResult.HasSuffix {
|
||||
// Config's "to" already contains a suffix - use it as-is (config priority)
|
||||
return targetModel
|
||||
}
|
||||
|
||||
// Preserve user's thinking suffix on the mapped model
|
||||
// (skip empty suffixes to avoid returning "model()")
|
||||
if requestResult.HasSuffix && requestResult.RawSuffix != "" {
|
||||
return targetModel + "(" + requestResult.RawSuffix + ")"
|
||||
}
|
||||
|
||||
// Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go
|
||||
return targetModel
|
||||
}
|
||||
|
||||
@@ -217,10 +217,10 @@ func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) {
|
||||
|
||||
mapper := NewModelMapper(mappings)
|
||||
|
||||
// Incoming model has reasoning suffix but should match base via regex
|
||||
// Incoming model has reasoning suffix, regex matches base, suffix is preserved
|
||||
result := mapper.MapModel("gpt-5(high)")
|
||||
if result != "gemini-2.5-pro" {
|
||||
t.Errorf("Expected gemini-2.5-pro, got %s", result)
|
||||
if result != "gemini-2.5-pro(high)" {
|
||||
t.Errorf("Expected gemini-2.5-pro(high), got %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -281,3 +281,95 @@ func TestModelMapper_Regex_CaseInsensitive(t *testing.T) {
|
||||
t.Errorf("Expected claude-sonnet-4, got %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestModelMapper_SuffixPreservation(t *testing.T) {
|
||||
reg := registry.GetGlobalRegistry()
|
||||
|
||||
// Register test models
|
||||
reg.RegisterClient("test-client-suffix", "gemini", []*registry.ModelInfo{
|
||||
{ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"},
|
||||
})
|
||||
reg.RegisterClient("test-client-suffix-2", "claude", []*registry.ModelInfo{
|
||||
{ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"},
|
||||
})
|
||||
defer reg.UnregisterClient("test-client-suffix")
|
||||
defer reg.UnregisterClient("test-client-suffix-2")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
mappings []config.AmpModelMapping
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "numeric suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(8192)",
|
||||
want: "gemini-2.5-pro(8192)",
|
||||
},
|
||||
{
|
||||
name: "level suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(high)",
|
||||
want: "gemini-2.5-pro(high)",
|
||||
},
|
||||
{
|
||||
name: "no suffix unchanged",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p",
|
||||
want: "gemini-2.5-pro",
|
||||
},
|
||||
{
|
||||
name: "config suffix takes priority",
|
||||
mappings: []config.AmpModelMapping{{From: "alias", To: "gemini-2.5-pro(medium)"}},
|
||||
input: "alias(high)",
|
||||
want: "gemini-2.5-pro(medium)",
|
||||
},
|
||||
{
|
||||
name: "regex with suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "^g25.*", To: "gemini-2.5-pro", Regex: true}},
|
||||
input: "g25p(8192)",
|
||||
want: "gemini-2.5-pro(8192)",
|
||||
},
|
||||
{
|
||||
name: "auto suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(auto)",
|
||||
want: "gemini-2.5-pro(auto)",
|
||||
},
|
||||
{
|
||||
name: "none suffix preserved",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(none)",
|
||||
want: "gemini-2.5-pro(none)",
|
||||
},
|
||||
{
|
||||
name: "case insensitive base lookup with suffix",
|
||||
mappings: []config.AmpModelMapping{{From: "G25P", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(high)",
|
||||
want: "gemini-2.5-pro(high)",
|
||||
},
|
||||
{
|
||||
name: "empty suffix filtered out",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
|
||||
input: "g25p()",
|
||||
want: "gemini-2.5-pro",
|
||||
},
|
||||
{
|
||||
name: "incomplete suffix treated as no suffix",
|
||||
mappings: []config.AmpModelMapping{{From: "g25p(high", To: "gemini-2.5-pro"}},
|
||||
input: "g25p(high",
|
||||
want: "gemini-2.5-pro",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
mapper := NewModelMapper(tt.mappings)
|
||||
got := mapper.MapModel(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("MapModel(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
|
||||
@@ -254,15 +255,13 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
|
||||
}
|
||||
managementasset.SetCurrentConfig(cfg)
|
||||
auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
|
||||
misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
|
||||
// Initialize management handler
|
||||
s.mgmt = managementHandlers.NewHandler(cfg, configFilePath, authManager)
|
||||
if optionState.localPassword != "" {
|
||||
s.mgmt.SetLocalPassword(optionState.localPassword)
|
||||
}
|
||||
logDir := filepath.Join(s.currentPath, "logs")
|
||||
if base := util.WritablePath(); base != "" {
|
||||
logDir = filepath.Join(base, "logs")
|
||||
}
|
||||
logDir := logging.ResolveLogDirectory(cfg)
|
||||
s.mgmt.SetLogDirectory(logDir)
|
||||
s.localPassword = optionState.localPassword
|
||||
|
||||
@@ -601,10 +600,10 @@ func (s *Server) registerManagementRoutes() {
|
||||
mgmt.PATCH("/oauth-excluded-models", s.mgmt.PatchOAuthExcludedModels)
|
||||
mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)
|
||||
|
||||
mgmt.GET("/oauth-model-mappings", s.mgmt.GetOAuthModelMappings)
|
||||
mgmt.PUT("/oauth-model-mappings", s.mgmt.PutOAuthModelMappings)
|
||||
mgmt.PATCH("/oauth-model-mappings", s.mgmt.PatchOAuthModelMappings)
|
||||
mgmt.DELETE("/oauth-model-mappings", s.mgmt.DeleteOAuthModelMappings)
|
||||
mgmt.GET("/oauth-model-alias", s.mgmt.GetOAuthModelAlias)
|
||||
mgmt.PUT("/oauth-model-alias", s.mgmt.PutOAuthModelAlias)
|
||||
mgmt.PATCH("/oauth-model-alias", s.mgmt.PatchOAuthModelAlias)
|
||||
mgmt.DELETE("/oauth-model-alias", s.mgmt.DeleteOAuthModelAlias)
|
||||
|
||||
mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
|
||||
mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
|
||||
@@ -912,6 +911,16 @@ func (s *Server) UpdateClients(cfg *config.Config) {
|
||||
log.Debugf("disable_cooling toggled to %t", cfg.DisableCooling)
|
||||
}
|
||||
}
|
||||
|
||||
if oldCfg == nil || oldCfg.CodexInstructionsEnabled != cfg.CodexInstructionsEnabled {
|
||||
misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
|
||||
if oldCfg != nil {
|
||||
log.Debugf("codex_instructions_enabled updated from %t to %t", oldCfg.CodexInstructionsEnabled, cfg.CodexInstructionsEnabled)
|
||||
} else {
|
||||
log.Debugf("codex_instructions_enabled toggled to %t", cfg.CodexInstructionsEnabled)
|
||||
}
|
||||
}
|
||||
|
||||
if s.handlers != nil && s.handlers.AuthManager != nil {
|
||||
s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
|
||||
}
|
||||
|
||||
57
internal/auth/codex/filename.go
Normal file
57
internal/auth/codex/filename.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package codex
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// CredentialFileName returns the filename used to persist Codex OAuth credentials.
|
||||
// When planType is available (e.g. "plus", "team"), it is appended after the email
|
||||
// as a suffix to disambiguate subscriptions.
|
||||
func CredentialFileName(email, planType, hashAccountID string, includeProviderPrefix bool) string {
|
||||
email = strings.TrimSpace(email)
|
||||
plan := normalizePlanTypeForFilename(planType)
|
||||
|
||||
prefix := ""
|
||||
if includeProviderPrefix {
|
||||
prefix = "codex"
|
||||
}
|
||||
|
||||
if plan == "" {
|
||||
return fmt.Sprintf("%s-%s.json", prefix, email)
|
||||
} else if plan == "team" {
|
||||
return fmt.Sprintf("%s-%s-%s-%s.json", prefix, hashAccountID, email, plan)
|
||||
}
|
||||
return fmt.Sprintf("%s-%s-%s.json", prefix, email, plan)
|
||||
}
|
||||
|
||||
func normalizePlanTypeForFilename(planType string) string {
|
||||
planType = strings.TrimSpace(planType)
|
||||
if planType == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
parts := strings.FieldsFunc(planType, func(r rune) bool {
|
||||
return !unicode.IsLetter(r) && !unicode.IsDigit(r)
|
||||
})
|
||||
if len(parts) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
for i, part := range parts {
|
||||
parts[i] = titleToken(part)
|
||||
}
|
||||
return strings.Join(parts, "-")
|
||||
}
|
||||
|
||||
func titleToken(token string) string {
|
||||
token = strings.TrimSpace(token)
|
||||
if token == "" {
|
||||
return ""
|
||||
}
|
||||
return cases.Title(language.English).String(token)
|
||||
}
|
||||
@@ -29,8 +29,9 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
geminiOauthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
|
||||
geminiOauthClientSecret = "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"
|
||||
geminiOauthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
|
||||
geminiOauthClientSecret = "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"
|
||||
geminiDefaultCallbackPort = 8085
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -49,8 +50,9 @@ type GeminiAuth struct {
|
||||
|
||||
// WebLoginOptions customizes the interactive OAuth flow.
|
||||
type WebLoginOptions struct {
|
||||
NoBrowser bool
|
||||
Prompt func(string) (string, error)
|
||||
NoBrowser bool
|
||||
CallbackPort int
|
||||
Prompt func(string) (string, error)
|
||||
}
|
||||
|
||||
// NewGeminiAuth creates a new instance of GeminiAuth.
|
||||
@@ -72,6 +74,12 @@ func NewGeminiAuth() *GeminiAuth {
|
||||
// - *http.Client: An HTTP client configured with authentication
|
||||
// - error: An error if the client configuration fails, nil otherwise
|
||||
func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiTokenStorage, cfg *config.Config, opts *WebLoginOptions) (*http.Client, error) {
|
||||
callbackPort := geminiDefaultCallbackPort
|
||||
if opts != nil && opts.CallbackPort > 0 {
|
||||
callbackPort = opts.CallbackPort
|
||||
}
|
||||
callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort)
|
||||
|
||||
// Configure proxy settings for the HTTP client if a proxy URL is provided.
|
||||
proxyURL, err := url.Parse(cfg.ProxyURL)
|
||||
if err == nil {
|
||||
@@ -106,7 +114,7 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken
|
||||
conf := &oauth2.Config{
|
||||
ClientID: geminiOauthClientID,
|
||||
ClientSecret: geminiOauthClientSecret,
|
||||
RedirectURL: "http://localhost:8085/oauth2callback", // This will be used by the local server.
|
||||
RedirectURL: callbackURL, // This will be used by the local server.
|
||||
Scopes: geminiOauthScopes,
|
||||
Endpoint: google.Endpoint,
|
||||
}
|
||||
@@ -218,14 +226,20 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
|
||||
// - *oauth2.Token: The OAuth2 token obtained from the authorization flow
|
||||
// - error: An error if the token acquisition fails, nil otherwise
|
||||
func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, opts *WebLoginOptions) (*oauth2.Token, error) {
|
||||
callbackPort := geminiDefaultCallbackPort
|
||||
if opts != nil && opts.CallbackPort > 0 {
|
||||
callbackPort = opts.CallbackPort
|
||||
}
|
||||
callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort)
|
||||
|
||||
// Use a channel to pass the authorization code from the HTTP handler to the main function.
|
||||
codeChan := make(chan string, 1)
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
// Create a new HTTP server with its own multiplexer.
|
||||
mux := http.NewServeMux()
|
||||
server := &http.Server{Addr: ":8085", Handler: mux}
|
||||
config.RedirectURL = "http://localhost:8085/oauth2callback"
|
||||
server := &http.Server{Addr: fmt.Sprintf(":%d", callbackPort), Handler: mux}
|
||||
config.RedirectURL = callbackURL
|
||||
|
||||
mux.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) {
|
||||
if err := r.URL.Query().Get("error"); err != "" {
|
||||
@@ -277,13 +291,13 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
|
||||
// Check if browser is available
|
||||
if !browser.IsAvailable() {
|
||||
log.Warn("No browser available on this system")
|
||||
util.PrintSSHTunnelInstructions(8085)
|
||||
util.PrintSSHTunnelInstructions(callbackPort)
|
||||
fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL)
|
||||
} else {
|
||||
if err := browser.OpenURL(authURL); err != nil {
|
||||
authErr := codex.NewAuthenticationError(codex.ErrBrowserOpenFailed, err)
|
||||
log.Warn(codex.GetUserFriendlyMessage(authErr))
|
||||
util.PrintSSHTunnelInstructions(8085)
|
||||
util.PrintSSHTunnelInstructions(callbackPort)
|
||||
fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL)
|
||||
|
||||
// Log platform info for debugging
|
||||
@@ -294,7 +308,7 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
util.PrintSSHTunnelInstructions(8085)
|
||||
util.PrintSSHTunnelInstructions(callbackPort)
|
||||
fmt.Printf("Please open this URL in your browser:\n\n%s\n", authURL)
|
||||
}
|
||||
|
||||
|
||||
9
internal/cache/signature_cache.go
vendored
9
internal/cache/signature_cache.go
vendored
@@ -3,6 +3,7 @@ package cache
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
@@ -94,7 +95,7 @@ func purgeExpiredSessions() {
|
||||
|
||||
// CacheSignature stores a thinking signature for a given session and text.
|
||||
// Used for Claude models that require signed thinking blocks in multi-turn conversations.
|
||||
func CacheSignature(sessionID, text, signature string) {
|
||||
func CacheSignature(modelName, sessionID, text, signature string) {
|
||||
if sessionID == "" || text == "" || signature == "" {
|
||||
return
|
||||
}
|
||||
@@ -102,7 +103,7 @@ func CacheSignature(sessionID, text, signature string) {
|
||||
return
|
||||
}
|
||||
|
||||
sc := getOrCreateSession(sessionID)
|
||||
sc := getOrCreateSession(fmt.Sprintf("%s#%s", modelName, sessionID))
|
||||
textHash := hashText(text)
|
||||
|
||||
sc.mu.Lock()
|
||||
@@ -116,12 +117,12 @@ func CacheSignature(sessionID, text, signature string) {
|
||||
|
||||
// GetCachedSignature retrieves a cached signature for a given session and text.
|
||||
// Returns empty string if not found or expired.
|
||||
func GetCachedSignature(sessionID, text string) string {
|
||||
func GetCachedSignature(modelName, sessionID, text string) string {
|
||||
if sessionID == "" || text == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
val, ok := signatureCache.Load(sessionID)
|
||||
val, ok := signatureCache.Load(fmt.Sprintf("%s#%s", modelName, sessionID))
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -32,9 +32,10 @@ func DoClaudeLogin(cfg *config.Config, options *LoginOptions) {
|
||||
manager := newAuthManager()
|
||||
|
||||
authOpts := &sdkAuth.LoginOptions{
|
||||
NoBrowser: options.NoBrowser,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
NoBrowser: options.NoBrowser,
|
||||
CallbackPort: options.CallbackPort,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
}
|
||||
|
||||
_, savedPath, err := manager.Login(context.Background(), "claude", cfg, authOpts)
|
||||
|
||||
@@ -22,9 +22,10 @@ func DoAntigravityLogin(cfg *config.Config, options *LoginOptions) {
|
||||
|
||||
manager := newAuthManager()
|
||||
authOpts := &sdkAuth.LoginOptions{
|
||||
NoBrowser: options.NoBrowser,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
NoBrowser: options.NoBrowser,
|
||||
CallbackPort: options.CallbackPort,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
}
|
||||
|
||||
record, savedPath, err := manager.Login(context.Background(), "antigravity", cfg, authOpts)
|
||||
|
||||
@@ -24,9 +24,10 @@ func DoIFlowLogin(cfg *config.Config, options *LoginOptions) {
|
||||
}
|
||||
|
||||
authOpts := &sdkAuth.LoginOptions{
|
||||
NoBrowser: options.NoBrowser,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
NoBrowser: options.NoBrowser,
|
||||
CallbackPort: options.CallbackPort,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
}
|
||||
|
||||
_, savedPath, err := manager.Login(context.Background(), "iflow", cfg, authOpts)
|
||||
|
||||
@@ -67,10 +67,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
}
|
||||
|
||||
loginOpts := &sdkAuth.LoginOptions{
|
||||
NoBrowser: options.NoBrowser,
|
||||
ProjectID: trimmedProjectID,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: callbackPrompt,
|
||||
NoBrowser: options.NoBrowser,
|
||||
ProjectID: trimmedProjectID,
|
||||
CallbackPort: options.CallbackPort,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: callbackPrompt,
|
||||
}
|
||||
|
||||
authenticator := sdkAuth.NewGeminiAuthenticator()
|
||||
@@ -88,8 +89,9 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
|
||||
geminiAuth := gemini.NewGeminiAuth()
|
||||
httpClient, errClient := geminiAuth.GetAuthenticatedClient(ctx, storage, cfg, &gemini.WebLoginOptions{
|
||||
NoBrowser: options.NoBrowser,
|
||||
Prompt: callbackPrompt,
|
||||
NoBrowser: options.NoBrowser,
|
||||
CallbackPort: options.CallbackPort,
|
||||
Prompt: callbackPrompt,
|
||||
})
|
||||
if errClient != nil {
|
||||
log.Errorf("Gemini authentication failed: %v", errClient)
|
||||
@@ -116,6 +118,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
}
|
||||
|
||||
activatedProjects := make([]string, 0, len(projectSelections))
|
||||
seenProjects := make(map[string]bool)
|
||||
for _, candidateID := range projectSelections {
|
||||
log.Infof("Activating project %s", candidateID)
|
||||
if errSetup := performGeminiCLISetup(ctx, httpClient, storage, candidateID); errSetup != nil {
|
||||
@@ -132,6 +135,13 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
if finalID == "" {
|
||||
finalID = candidateID
|
||||
}
|
||||
|
||||
// Skip duplicates
|
||||
if seenProjects[finalID] {
|
||||
log.Infof("Project %s already activated, skipping", finalID)
|
||||
continue
|
||||
}
|
||||
seenProjects[finalID] = true
|
||||
activatedProjects = append(activatedProjects, finalID)
|
||||
}
|
||||
|
||||
@@ -259,7 +269,39 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
|
||||
finalProjectID := projectID
|
||||
if responseProjectID != "" {
|
||||
if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
|
||||
log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
|
||||
// Check if this is a free user (gen-lang-client projects or free/legacy tier)
|
||||
isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
|
||||
strings.EqualFold(tierID, "FREE") ||
|
||||
strings.EqualFold(tierID, "LEGACY")
|
||||
|
||||
if isFreeUser {
|
||||
// Interactive prompt for free users
|
||||
fmt.Printf("\nGoogle returned a different project ID:\n")
|
||||
fmt.Printf(" Requested (frontend): %s\n", projectID)
|
||||
fmt.Printf(" Returned (backend): %s\n\n", responseProjectID)
|
||||
fmt.Printf(" Backend project IDs have access to preview models (gemini-3-*).\n")
|
||||
fmt.Printf(" This is normal for free tier users.\n\n")
|
||||
fmt.Printf("Which project ID would you like to use?\n")
|
||||
fmt.Printf(" [1] Backend (recommended): %s\n", responseProjectID)
|
||||
fmt.Printf(" [2] Frontend: %s\n\n", projectID)
|
||||
fmt.Printf("Enter choice [1]: ")
|
||||
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
choice, _ := reader.ReadString('\n')
|
||||
choice = strings.TrimSpace(choice)
|
||||
|
||||
if choice == "2" {
|
||||
log.Infof("Using frontend project ID: %s", projectID)
|
||||
fmt.Println(". Warning: Frontend project IDs may not have access to preview models.")
|
||||
finalProjectID = projectID
|
||||
} else {
|
||||
log.Infof("Using backend project ID: %s (recommended)", responseProjectID)
|
||||
finalProjectID = responseProjectID
|
||||
}
|
||||
} else {
|
||||
// Pro users: keep requested project ID (original behavior)
|
||||
log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
|
||||
}
|
||||
} else {
|
||||
finalProjectID = responseProjectID
|
||||
}
|
||||
|
||||
@@ -19,6 +19,9 @@ type LoginOptions struct {
|
||||
// NoBrowser indicates whether to skip opening the browser automatically.
|
||||
NoBrowser bool
|
||||
|
||||
// CallbackPort overrides the local OAuth callback port when set (>0).
|
||||
CallbackPort int
|
||||
|
||||
// Prompt allows the caller to provide interactive input when needed.
|
||||
Prompt func(prompt string) (string, error)
|
||||
}
|
||||
@@ -43,9 +46,10 @@ func DoCodexLogin(cfg *config.Config, options *LoginOptions) {
|
||||
manager := newAuthManager()
|
||||
|
||||
authOpts := &sdkAuth.LoginOptions{
|
||||
NoBrowser: options.NoBrowser,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
NoBrowser: options.NoBrowser,
|
||||
CallbackPort: options.CallbackPort,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
}
|
||||
|
||||
_, savedPath, err := manager.Login(context.Background(), "codex", cfg, authOpts)
|
||||
|
||||
@@ -36,9 +36,10 @@ func DoQwenLogin(cfg *config.Config, options *LoginOptions) {
|
||||
}
|
||||
|
||||
authOpts := &sdkAuth.LoginOptions{
|
||||
NoBrowser: options.NoBrowser,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
NoBrowser: options.NoBrowser,
|
||||
CallbackPort: options.CallbackPort,
|
||||
Metadata: map[string]string{},
|
||||
Prompt: promptFn,
|
||||
}
|
||||
|
||||
_, savedPath, err := manager.Login(context.Background(), "qwen", cfg, authOpts)
|
||||
|
||||
@@ -6,12 +6,14 @@ package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
@@ -69,6 +71,11 @@ type Config struct {
|
||||
// WebsocketAuth enables or disables authentication for the WebSocket API.
|
||||
WebsocketAuth bool `yaml:"ws-auth" json:"ws-auth"`
|
||||
|
||||
// CodexInstructionsEnabled controls whether official Codex instructions are injected.
|
||||
// When false (default), CodexInstructionsForModel returns immediately without modification.
|
||||
// When true, the original instruction injection logic is used.
|
||||
CodexInstructionsEnabled bool `yaml:"codex-instructions-enabled" json:"codex-instructions-enabled"`
|
||||
|
||||
// GeminiKey defines Gemini API key configurations with optional routing overrides.
|
||||
GeminiKey []GeminiKey `yaml:"gemini-api-key" json:"gemini-api-key"`
|
||||
|
||||
@@ -91,13 +98,13 @@ type Config struct {
|
||||
// OAuthExcludedModels defines per-provider global model exclusions applied to OAuth/file-backed auth entries.
|
||||
OAuthExcludedModels map[string][]string `yaml:"oauth-excluded-models,omitempty" json:"oauth-excluded-models,omitempty"`
|
||||
|
||||
// OAuthModelMappings defines global model name mappings for OAuth/file-backed auth channels.
|
||||
// These mappings affect both model listing and model routing for supported channels:
|
||||
// OAuthModelAlias defines global model name aliases for OAuth/file-backed auth channels.
|
||||
// These aliases affect both model listing and model routing for supported channels:
|
||||
// gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
|
||||
//
|
||||
// NOTE: This does not apply to existing per-credential model alias features under:
|
||||
// gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, and ampcode.
|
||||
OAuthModelMappings map[string][]ModelNameMapping `yaml:"oauth-model-mappings,omitempty" json:"oauth-model-mappings,omitempty"`
|
||||
OAuthModelAlias map[string][]OAuthModelAlias `yaml:"oauth-model-alias,omitempty" json:"oauth-model-alias,omitempty"`
|
||||
|
||||
// Payload defines default and override rules for provider payload parameters.
|
||||
Payload PayloadConfig `yaml:"payload" json:"payload"`
|
||||
@@ -145,11 +152,11 @@ type RoutingConfig struct {
|
||||
Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"`
|
||||
}
|
||||
|
||||
// ModelNameMapping defines a model ID mapping for a specific channel.
|
||||
// OAuthModelAlias defines a model ID alias for a specific channel.
|
||||
// It maps the upstream model name (Name) to the client-visible alias (Alias).
|
||||
// When Fork is true, the alias is added as an additional model in listings while
|
||||
// keeping the original model ID available.
|
||||
type ModelNameMapping struct {
|
||||
type OAuthModelAlias struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Alias string `yaml:"alias" json:"alias"`
|
||||
Fork bool `yaml:"fork,omitempty" json:"fork,omitempty"`
|
||||
@@ -216,8 +223,12 @@ type AmpUpstreamAPIKeyEntry struct {
|
||||
type PayloadConfig struct {
|
||||
// Default defines rules that only set parameters when they are missing in the payload.
|
||||
Default []PayloadRule `yaml:"default" json:"default"`
|
||||
// DefaultRaw defines rules that set raw JSON values only when they are missing.
|
||||
DefaultRaw []PayloadRule `yaml:"default-raw" json:"default-raw"`
|
||||
// Override defines rules that always set parameters, overwriting any existing values.
|
||||
Override []PayloadRule `yaml:"override" json:"override"`
|
||||
// OverrideRaw defines rules that always set raw JSON values, overwriting any existing values.
|
||||
OverrideRaw []PayloadRule `yaml:"override-raw" json:"override-raw"`
|
||||
}
|
||||
|
||||
// PayloadRule describes a single rule targeting a list of models with parameter updates.
|
||||
@@ -225,6 +236,7 @@ type PayloadRule struct {
|
||||
// Models lists model entries with name pattern and protocol constraint.
|
||||
Models []PayloadModelRule `yaml:"models" json:"models"`
|
||||
// Params maps JSON paths (gjson/sjson syntax) to values written into the payload.
|
||||
// For *-raw rules, values are treated as raw JSON fragments (strings are used as-is).
|
||||
Params map[string]any `yaml:"params" json:"params"`
|
||||
}
|
||||
|
||||
@@ -236,12 +248,35 @@ type PayloadModelRule struct {
|
||||
Protocol string `yaml:"protocol" json:"protocol"`
|
||||
}
|
||||
|
||||
// CloakConfig configures request cloaking for non-Claude-Code clients.
|
||||
// Cloaking disguises API requests to appear as originating from the official Claude Code CLI.
|
||||
type CloakConfig struct {
|
||||
// Mode controls cloaking behavior: "auto" (default), "always", or "never".
|
||||
// - "auto": cloak only when client is not Claude Code (based on User-Agent)
|
||||
// - "always": always apply cloaking regardless of client
|
||||
// - "never": never apply cloaking
|
||||
Mode string `yaml:"mode,omitempty" json:"mode,omitempty"`
|
||||
|
||||
// StrictMode controls how system prompts are handled when cloaking.
|
||||
// - false (default): prepend Claude Code prompt to user system messages
|
||||
// - true: strip all user system messages, keep only Claude Code prompt
|
||||
StrictMode bool `yaml:"strict-mode,omitempty" json:"strict-mode,omitempty"`
|
||||
|
||||
// SensitiveWords is a list of words to obfuscate with zero-width characters.
|
||||
// This can help bypass certain content filters.
|
||||
SensitiveWords []string `yaml:"sensitive-words,omitempty" json:"sensitive-words,omitempty"`
|
||||
}
|
||||
|
||||
// ClaudeKey represents the configuration for a Claude API key,
|
||||
// including the API key itself and an optional base URL for the API endpoint.
|
||||
type ClaudeKey struct {
|
||||
// APIKey is the authentication key for accessing Claude API services.
|
||||
APIKey string `yaml:"api-key" json:"api-key"`
|
||||
|
||||
// Priority controls selection preference when multiple credentials match.
|
||||
// Higher values are preferred; defaults to 0.
|
||||
Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
|
||||
|
||||
// Prefix optionally namespaces models for this credential (e.g., "teamA/claude-sonnet-4").
|
||||
Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
|
||||
|
||||
@@ -260,8 +295,14 @@ type ClaudeKey struct {
|
||||
|
||||
// ExcludedModels lists model IDs that should be excluded for this provider.
|
||||
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
|
||||
|
||||
// Cloak configures request cloaking for non-Claude-Code clients.
|
||||
Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"`
|
||||
}
|
||||
|
||||
func (k ClaudeKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k ClaudeKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// ClaudeModel describes a mapping between an alias and the actual upstream model name.
|
||||
type ClaudeModel struct {
|
||||
// Name is the upstream model identifier used when issuing requests.
|
||||
@@ -280,6 +321,10 @@ type CodexKey struct {
|
||||
// APIKey is the authentication key for accessing Codex API services.
|
||||
APIKey string `yaml:"api-key" json:"api-key"`
|
||||
|
||||
// Priority controls selection preference when multiple credentials match.
|
||||
// Higher values are preferred; defaults to 0.
|
||||
Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
|
||||
|
||||
// Prefix optionally namespaces models for this credential (e.g., "teamA/gpt-5-codex").
|
||||
Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
|
||||
|
||||
@@ -300,6 +345,9 @@ type CodexKey struct {
|
||||
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
|
||||
}
|
||||
|
||||
func (k CodexKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k CodexKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// CodexModel describes a mapping between an alias and the actual upstream model name.
|
||||
type CodexModel struct {
|
||||
// Name is the upstream model identifier used when issuing requests.
|
||||
@@ -318,6 +366,10 @@ type GeminiKey struct {
|
||||
// APIKey is the authentication key for accessing Gemini API services.
|
||||
APIKey string `yaml:"api-key" json:"api-key"`
|
||||
|
||||
// Priority controls selection preference when multiple credentials match.
|
||||
// Higher values are preferred; defaults to 0.
|
||||
Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
|
||||
|
||||
// Prefix optionally namespaces models for this credential (e.g., "teamA/gemini-3-pro-preview").
|
||||
Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
|
||||
|
||||
@@ -337,6 +389,9 @@ type GeminiKey struct {
|
||||
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
|
||||
}
|
||||
|
||||
func (k GeminiKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k GeminiKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// GeminiModel describes a mapping between an alias and the actual upstream model name.
|
||||
type GeminiModel struct {
|
||||
// Name is the upstream model identifier used when issuing requests.
|
||||
@@ -355,6 +410,10 @@ type OpenAICompatibility struct {
|
||||
// Name is the identifier for this OpenAI compatibility configuration.
|
||||
Name string `yaml:"name" json:"name"`
|
||||
|
||||
// Priority controls selection preference when multiple providers or credentials match.
|
||||
// Higher values are preferred; defaults to 0.
|
||||
Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
|
||||
|
||||
// Prefix optionally namespaces model aliases for this provider (e.g., "teamA/kimi-k2").
|
||||
Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
|
||||
|
||||
@@ -390,6 +449,9 @@ type OpenAICompatibilityModel struct {
|
||||
Alias string `yaml:"alias" json:"alias"`
|
||||
}
|
||||
|
||||
func (m OpenAICompatibilityModel) GetName() string { return m.Name }
|
||||
func (m OpenAICompatibilityModel) GetAlias() string { return m.Alias }
|
||||
|
||||
// LoadConfig reads a YAML configuration file from the given path,
|
||||
// unmarshals it into a Config struct, applies environment variable overrides,
|
||||
// and returns it.
|
||||
@@ -408,6 +470,15 @@ func LoadConfig(configFile string) (*Config, error) {
|
||||
// If optional is true and the file is missing, it returns an empty Config.
|
||||
// If optional is true and the file is empty or invalid, it returns an empty Config.
|
||||
func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
|
||||
// Perform oauth-model-alias migration before loading config.
|
||||
// This migrates oauth-model-mappings to oauth-model-alias if needed.
|
||||
if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
|
||||
// Log warning but don't fail - config loading should still work
|
||||
fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
|
||||
} else if migrated {
|
||||
fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
|
||||
}
|
||||
|
||||
// Read the entire configuration file into memory.
|
||||
data, err := os.ReadFile(configFile)
|
||||
if err != nil {
|
||||
@@ -500,8 +571,11 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
|
||||
// Normalize OAuth provider model exclusion map.
|
||||
cfg.OAuthExcludedModels = NormalizeOAuthExcludedModels(cfg.OAuthExcludedModels)
|
||||
|
||||
// Normalize global OAuth model name mappings.
|
||||
cfg.SanitizeOAuthModelMappings()
|
||||
// Normalize global OAuth model name aliases.
|
||||
cfg.SanitizeOAuthModelAlias()
|
||||
|
||||
// Validate raw payload rules and drop invalid entries.
|
||||
cfg.SanitizePayloadRules()
|
||||
|
||||
if cfg.legacyMigrationPending {
|
||||
fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
|
||||
@@ -519,24 +593,79 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// SanitizeOAuthModelMappings normalizes and deduplicates global OAuth model name mappings.
|
||||
// It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
|
||||
// allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
|
||||
func (cfg *Config) SanitizeOAuthModelMappings() {
|
||||
if cfg == nil || len(cfg.OAuthModelMappings) == 0 {
|
||||
// SanitizePayloadRules validates raw JSON payload rule params and drops invalid rules.
|
||||
func (cfg *Config) SanitizePayloadRules() {
|
||||
if cfg == nil {
|
||||
return
|
||||
}
|
||||
out := make(map[string][]ModelNameMapping, len(cfg.OAuthModelMappings))
|
||||
for rawChannel, mappings := range cfg.OAuthModelMappings {
|
||||
channel := strings.ToLower(strings.TrimSpace(rawChannel))
|
||||
if channel == "" || len(mappings) == 0 {
|
||||
cfg.Payload.DefaultRaw = sanitizePayloadRawRules(cfg.Payload.DefaultRaw, "default-raw")
|
||||
cfg.Payload.OverrideRaw = sanitizePayloadRawRules(cfg.Payload.OverrideRaw, "override-raw")
|
||||
}
|
||||
|
||||
func sanitizePayloadRawRules(rules []PayloadRule, section string) []PayloadRule {
|
||||
if len(rules) == 0 {
|
||||
return rules
|
||||
}
|
||||
out := make([]PayloadRule, 0, len(rules))
|
||||
for i := range rules {
|
||||
rule := rules[i]
|
||||
if len(rule.Params) == 0 {
|
||||
continue
|
||||
}
|
||||
seenAlias := make(map[string]struct{}, len(mappings))
|
||||
clean := make([]ModelNameMapping, 0, len(mappings))
|
||||
for _, mapping := range mappings {
|
||||
name := strings.TrimSpace(mapping.Name)
|
||||
alias := strings.TrimSpace(mapping.Alias)
|
||||
invalid := false
|
||||
for path, value := range rule.Params {
|
||||
raw, ok := payloadRawString(value)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
trimmed := bytes.TrimSpace(raw)
|
||||
if len(trimmed) == 0 || !json.Valid(trimmed) {
|
||||
log.WithFields(log.Fields{
|
||||
"section": section,
|
||||
"rule_index": i + 1,
|
||||
"param": path,
|
||||
}).Warn("payload rule dropped: invalid raw JSON")
|
||||
invalid = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if invalid {
|
||||
continue
|
||||
}
|
||||
out = append(out, rule)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func payloadRawString(value any) ([]byte, bool) {
|
||||
switch typed := value.(type) {
|
||||
case string:
|
||||
return []byte(typed), true
|
||||
case []byte:
|
||||
return typed, true
|
||||
default:
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
|
||||
// SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases.
|
||||
// It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
|
||||
// allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
|
||||
func (cfg *Config) SanitizeOAuthModelAlias() {
|
||||
if cfg == nil || len(cfg.OAuthModelAlias) == 0 {
|
||||
return
|
||||
}
|
||||
out := make(map[string][]OAuthModelAlias, len(cfg.OAuthModelAlias))
|
||||
for rawChannel, aliases := range cfg.OAuthModelAlias {
|
||||
channel := strings.ToLower(strings.TrimSpace(rawChannel))
|
||||
if channel == "" || len(aliases) == 0 {
|
||||
continue
|
||||
}
|
||||
seenAlias := make(map[string]struct{}, len(aliases))
|
||||
clean := make([]OAuthModelAlias, 0, len(aliases))
|
||||
for _, entry := range aliases {
|
||||
name := strings.TrimSpace(entry.Name)
|
||||
alias := strings.TrimSpace(entry.Alias)
|
||||
if name == "" || alias == "" {
|
||||
continue
|
||||
}
|
||||
@@ -548,13 +677,13 @@ func (cfg *Config) SanitizeOAuthModelMappings() {
|
||||
continue
|
||||
}
|
||||
seenAlias[aliasKey] = struct{}{}
|
||||
clean = append(clean, ModelNameMapping{Name: name, Alias: alias, Fork: mapping.Fork})
|
||||
clean = append(clean, OAuthModelAlias{Name: name, Alias: alias, Fork: entry.Fork})
|
||||
}
|
||||
if len(clean) > 0 {
|
||||
out[channel] = clean
|
||||
}
|
||||
}
|
||||
cfg.OAuthModelMappings = out
|
||||
cfg.OAuthModelAlias = out
|
||||
}
|
||||
|
||||
// SanitizeOpenAICompatibility removes OpenAI-compatibility provider entries that are
|
||||
|
||||
275
internal/config/oauth_model_alias_migration.go
Normal file
275
internal/config/oauth_model_alias_migration.go
Normal file
@@ -0,0 +1,275 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// antigravityModelConversionTable maps old built-in aliases to actual model names
|
||||
// for the antigravity channel during migration.
|
||||
var antigravityModelConversionTable = map[string]string{
|
||||
"gemini-2.5-computer-use-preview-10-2025": "rev19-uic3-1p",
|
||||
"gemini-3-pro-image-preview": "gemini-3-pro-image",
|
||||
"gemini-3-pro-preview": "gemini-3-pro-high",
|
||||
"gemini-3-flash-preview": "gemini-3-flash",
|
||||
"gemini-claude-sonnet-4-5": "claude-sonnet-4-5",
|
||||
"gemini-claude-sonnet-4-5-thinking": "claude-sonnet-4-5-thinking",
|
||||
"gemini-claude-opus-4-5-thinking": "claude-opus-4-5-thinking",
|
||||
}
|
||||
|
||||
// defaultAntigravityAliases returns the default oauth-model-alias configuration
|
||||
// for the antigravity channel when neither field exists.
|
||||
func defaultAntigravityAliases() []OAuthModelAlias {
|
||||
return []OAuthModelAlias{
|
||||
{Name: "rev19-uic3-1p", Alias: "gemini-2.5-computer-use-preview-10-2025"},
|
||||
{Name: "gemini-3-pro-image", Alias: "gemini-3-pro-image-preview"},
|
||||
{Name: "gemini-3-pro-high", Alias: "gemini-3-pro-preview"},
|
||||
{Name: "gemini-3-flash", Alias: "gemini-3-flash-preview"},
|
||||
{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
|
||||
{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
|
||||
{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
|
||||
}
|
||||
}
|
||||
|
||||
// MigrateOAuthModelAlias checks for and performs migration from oauth-model-mappings
|
||||
// to oauth-model-alias at startup. Returns true if migration was performed.
|
||||
//
|
||||
// Migration flow:
|
||||
// 1. Check if oauth-model-alias exists -> skip migration
|
||||
// 2. Check if oauth-model-mappings exists -> convert and migrate
|
||||
// - For antigravity channel, convert old built-in aliases to actual model names
|
||||
//
|
||||
// 3. Neither exists -> add default antigravity config
|
||||
func MigrateOAuthModelAlias(configFile string) (bool, error) {
|
||||
data, err := os.ReadFile(configFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
if len(data) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Parse YAML into node tree to preserve structure
|
||||
var root yaml.Node
|
||||
if err := yaml.Unmarshal(data, &root); err != nil {
|
||||
return false, nil
|
||||
}
|
||||
if root.Kind != yaml.DocumentNode || len(root.Content) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
rootMap := root.Content[0]
|
||||
if rootMap == nil || rootMap.Kind != yaml.MappingNode {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Check if oauth-model-alias already exists
|
||||
if findMapKeyIndex(rootMap, "oauth-model-alias") >= 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Check if oauth-model-mappings exists
|
||||
oldIdx := findMapKeyIndex(rootMap, "oauth-model-mappings")
|
||||
if oldIdx >= 0 {
|
||||
// Migrate from old field
|
||||
return migrateFromOldField(configFile, &root, rootMap, oldIdx)
|
||||
}
|
||||
|
||||
// Neither field exists - add default antigravity config
|
||||
return addDefaultAntigravityConfig(configFile, &root, rootMap)
|
||||
}
|
||||
|
||||
// migrateFromOldField converts oauth-model-mappings to oauth-model-alias
|
||||
func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node, oldIdx int) (bool, error) {
|
||||
if oldIdx+1 >= len(rootMap.Content) {
|
||||
return false, nil
|
||||
}
|
||||
oldValue := rootMap.Content[oldIdx+1]
|
||||
if oldValue == nil || oldValue.Kind != yaml.MappingNode {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Parse the old aliases
|
||||
oldAliases := parseOldAliasNode(oldValue)
|
||||
if len(oldAliases) == 0 {
|
||||
// Remove the old field and write
|
||||
removeMapKeyByIndex(rootMap, oldIdx)
|
||||
return writeYAMLNode(configFile, root)
|
||||
}
|
||||
|
||||
// Convert model names for antigravity channel
|
||||
newAliases := make(map[string][]OAuthModelAlias, len(oldAliases))
|
||||
for channel, entries := range oldAliases {
|
||||
converted := make([]OAuthModelAlias, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
newEntry := OAuthModelAlias{
|
||||
Name: entry.Name,
|
||||
Alias: entry.Alias,
|
||||
Fork: entry.Fork,
|
||||
}
|
||||
// Convert model names for antigravity channel
|
||||
if strings.EqualFold(channel, "antigravity") {
|
||||
if actual, ok := antigravityModelConversionTable[entry.Name]; ok {
|
||||
newEntry.Name = actual
|
||||
}
|
||||
}
|
||||
converted = append(converted, newEntry)
|
||||
}
|
||||
newAliases[channel] = converted
|
||||
}
|
||||
|
||||
// For antigravity channel, supplement missing default aliases
|
||||
if antigravityEntries, exists := newAliases["antigravity"]; exists {
|
||||
// Build a set of already configured model names (upstream names)
|
||||
configuredModels := make(map[string]bool, len(antigravityEntries))
|
||||
for _, entry := range antigravityEntries {
|
||||
configuredModels[entry.Name] = true
|
||||
}
|
||||
|
||||
// Add missing default aliases
|
||||
for _, defaultAlias := range defaultAntigravityAliases() {
|
||||
if !configuredModels[defaultAlias.Name] {
|
||||
antigravityEntries = append(antigravityEntries, defaultAlias)
|
||||
}
|
||||
}
|
||||
newAliases["antigravity"] = antigravityEntries
|
||||
}
|
||||
|
||||
// Build new node
|
||||
newNode := buildOAuthModelAliasNode(newAliases)
|
||||
|
||||
// Replace old key with new key and value
|
||||
rootMap.Content[oldIdx].Value = "oauth-model-alias"
|
||||
rootMap.Content[oldIdx+1] = newNode
|
||||
|
||||
return writeYAMLNode(configFile, root)
|
||||
}
|
||||
|
||||
// addDefaultAntigravityConfig adds the default antigravity configuration
|
||||
func addDefaultAntigravityConfig(configFile string, root *yaml.Node, rootMap *yaml.Node) (bool, error) {
|
||||
defaults := map[string][]OAuthModelAlias{
|
||||
"antigravity": defaultAntigravityAliases(),
|
||||
}
|
||||
newNode := buildOAuthModelAliasNode(defaults)
|
||||
|
||||
// Add new key-value pair
|
||||
keyNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "oauth-model-alias"}
|
||||
rootMap.Content = append(rootMap.Content, keyNode, newNode)
|
||||
|
||||
return writeYAMLNode(configFile, root)
|
||||
}
|
||||
|
||||
// parseOldAliasNode parses the old oauth-model-mappings node structure
|
||||
func parseOldAliasNode(node *yaml.Node) map[string][]OAuthModelAlias {
|
||||
if node == nil || node.Kind != yaml.MappingNode {
|
||||
return nil
|
||||
}
|
||||
result := make(map[string][]OAuthModelAlias)
|
||||
for i := 0; i+1 < len(node.Content); i += 2 {
|
||||
channelNode := node.Content[i]
|
||||
entriesNode := node.Content[i+1]
|
||||
if channelNode == nil || entriesNode == nil {
|
||||
continue
|
||||
}
|
||||
channel := strings.ToLower(strings.TrimSpace(channelNode.Value))
|
||||
if channel == "" || entriesNode.Kind != yaml.SequenceNode {
|
||||
continue
|
||||
}
|
||||
entries := make([]OAuthModelAlias, 0, len(entriesNode.Content))
|
||||
for _, entryNode := range entriesNode.Content {
|
||||
if entryNode == nil || entryNode.Kind != yaml.MappingNode {
|
||||
continue
|
||||
}
|
||||
entry := parseAliasEntry(entryNode)
|
||||
if entry.Name != "" && entry.Alias != "" {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
}
|
||||
if len(entries) > 0 {
|
||||
result[channel] = entries
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// parseAliasEntry parses a single alias entry node
|
||||
func parseAliasEntry(node *yaml.Node) OAuthModelAlias {
|
||||
var entry OAuthModelAlias
|
||||
for i := 0; i+1 < len(node.Content); i += 2 {
|
||||
keyNode := node.Content[i]
|
||||
valNode := node.Content[i+1]
|
||||
if keyNode == nil || valNode == nil {
|
||||
continue
|
||||
}
|
||||
switch strings.ToLower(strings.TrimSpace(keyNode.Value)) {
|
||||
case "name":
|
||||
entry.Name = strings.TrimSpace(valNode.Value)
|
||||
case "alias":
|
||||
entry.Alias = strings.TrimSpace(valNode.Value)
|
||||
case "fork":
|
||||
entry.Fork = strings.ToLower(strings.TrimSpace(valNode.Value)) == "true"
|
||||
}
|
||||
}
|
||||
return entry
|
||||
}
|
||||
|
||||
// buildOAuthModelAliasNode creates a YAML node for oauth-model-alias
|
||||
func buildOAuthModelAliasNode(aliases map[string][]OAuthModelAlias) *yaml.Node {
|
||||
node := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
|
||||
for channel, entries := range aliases {
|
||||
channelNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: channel}
|
||||
entriesNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
|
||||
for _, entry := range entries {
|
||||
entryNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
|
||||
entryNode.Content = append(entryNode.Content,
|
||||
&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "name"},
|
||||
&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Name},
|
||||
&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "alias"},
|
||||
&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Alias},
|
||||
)
|
||||
if entry.Fork {
|
||||
entryNode.Content = append(entryNode.Content,
|
||||
&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "fork"},
|
||||
&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!bool", Value: "true"},
|
||||
)
|
||||
}
|
||||
entriesNode.Content = append(entriesNode.Content, entryNode)
|
||||
}
|
||||
node.Content = append(node.Content, channelNode, entriesNode)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
// removeMapKeyByIndex removes a key-value pair from a mapping node by index
|
||||
func removeMapKeyByIndex(mapNode *yaml.Node, keyIdx int) {
|
||||
if mapNode == nil || mapNode.Kind != yaml.MappingNode {
|
||||
return
|
||||
}
|
||||
if keyIdx < 0 || keyIdx+1 >= len(mapNode.Content) {
|
||||
return
|
||||
}
|
||||
mapNode.Content = append(mapNode.Content[:keyIdx], mapNode.Content[keyIdx+2:]...)
|
||||
}
|
||||
|
||||
// writeYAMLNode writes the YAML node tree back to file
|
||||
func writeYAMLNode(configFile string, root *yaml.Node) (bool, error) {
|
||||
f, err := os.Create(configFile)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
enc := yaml.NewEncoder(f)
|
||||
enc.SetIndent(2)
|
||||
if err := enc.Encode(root); err != nil {
|
||||
return false, err
|
||||
}
|
||||
if err := enc.Close(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
242
internal/config/oauth_model_alias_migration_test.go
Normal file
242
internal/config/oauth_model_alias_migration_test.go
Normal file
@@ -0,0 +1,242 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
func TestMigrateOAuthModelAlias_SkipsIfNewFieldExists(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
configFile := filepath.Join(dir, "config.yaml")
|
||||
|
||||
content := `oauth-model-alias:
|
||||
gemini-cli:
|
||||
- name: "gemini-2.5-pro"
|
||||
alias: "g2.5p"
|
||||
`
|
||||
if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
migrated, err := MigrateOAuthModelAlias(configFile)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if migrated {
|
||||
t.Fatal("expected no migration when oauth-model-alias already exists")
|
||||
}
|
||||
|
||||
// Verify file unchanged
|
||||
data, _ := os.ReadFile(configFile)
|
||||
if !strings.Contains(string(data), "oauth-model-alias:") {
|
||||
t.Fatal("file should still contain oauth-model-alias")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateOAuthModelAlias_MigratesOldField(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
configFile := filepath.Join(dir, "config.yaml")
|
||||
|
||||
content := `oauth-model-mappings:
|
||||
gemini-cli:
|
||||
- name: "gemini-2.5-pro"
|
||||
alias: "g2.5p"
|
||||
fork: true
|
||||
`
|
||||
if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
migrated, err := MigrateOAuthModelAlias(configFile)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !migrated {
|
||||
t.Fatal("expected migration to occur")
|
||||
}
|
||||
|
||||
// Verify new field exists and old field removed
|
||||
data, _ := os.ReadFile(configFile)
|
||||
if strings.Contains(string(data), "oauth-model-mappings:") {
|
||||
t.Fatal("old field should be removed")
|
||||
}
|
||||
if !strings.Contains(string(data), "oauth-model-alias:") {
|
||||
t.Fatal("new field should exist")
|
||||
}
|
||||
|
||||
// Parse and verify structure
|
||||
var root yaml.Node
|
||||
if err := yaml.Unmarshal(data, &root); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
configFile := filepath.Join(dir, "config.yaml")
|
||||
|
||||
// Use old model names that should be converted
|
||||
content := `oauth-model-mappings:
|
||||
antigravity:
|
||||
- name: "gemini-2.5-computer-use-preview-10-2025"
|
||||
alias: "computer-use"
|
||||
- name: "gemini-3-pro-preview"
|
||||
alias: "g3p"
|
||||
`
|
||||
if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
migrated, err := MigrateOAuthModelAlias(configFile)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !migrated {
|
||||
t.Fatal("expected migration to occur")
|
||||
}
|
||||
|
||||
// Verify model names were converted
|
||||
data, _ := os.ReadFile(configFile)
|
||||
content = string(data)
|
||||
if !strings.Contains(content, "rev19-uic3-1p") {
|
||||
t.Fatal("expected gemini-2.5-computer-use-preview-10-2025 to be converted to rev19-uic3-1p")
|
||||
}
|
||||
if !strings.Contains(content, "gemini-3-pro-high") {
|
||||
t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high")
|
||||
}
|
||||
|
||||
// Verify missing default aliases were supplemented
|
||||
if !strings.Contains(content, "gemini-3-pro-image") {
|
||||
t.Fatal("expected missing default alias gemini-3-pro-image to be added")
|
||||
}
|
||||
if !strings.Contains(content, "gemini-3-flash") {
|
||||
t.Fatal("expected missing default alias gemini-3-flash to be added")
|
||||
}
|
||||
if !strings.Contains(content, "claude-sonnet-4-5") {
|
||||
t.Fatal("expected missing default alias claude-sonnet-4-5 to be added")
|
||||
}
|
||||
if !strings.Contains(content, "claude-sonnet-4-5-thinking") {
|
||||
t.Fatal("expected missing default alias claude-sonnet-4-5-thinking to be added")
|
||||
}
|
||||
if !strings.Contains(content, "claude-opus-4-5-thinking") {
|
||||
t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
configFile := filepath.Join(dir, "config.yaml")
|
||||
|
||||
content := `debug: true
|
||||
port: 8080
|
||||
`
|
||||
if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
migrated, err := MigrateOAuthModelAlias(configFile)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !migrated {
|
||||
t.Fatal("expected migration to add default config")
|
||||
}
|
||||
|
||||
// Verify default antigravity config was added
|
||||
data, _ := os.ReadFile(configFile)
|
||||
content = string(data)
|
||||
if !strings.Contains(content, "oauth-model-alias:") {
|
||||
t.Fatal("expected oauth-model-alias to be added")
|
||||
}
|
||||
if !strings.Contains(content, "antigravity:") {
|
||||
t.Fatal("expected antigravity channel to be added")
|
||||
}
|
||||
if !strings.Contains(content, "rev19-uic3-1p") {
|
||||
t.Fatal("expected default antigravity aliases to include rev19-uic3-1p")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateOAuthModelAlias_PreservesOtherConfig(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
configFile := filepath.Join(dir, "config.yaml")
|
||||
|
||||
content := `debug: true
|
||||
port: 8080
|
||||
oauth-model-mappings:
|
||||
gemini-cli:
|
||||
- name: "test"
|
||||
alias: "t"
|
||||
api-keys:
|
||||
- "key1"
|
||||
- "key2"
|
||||
`
|
||||
if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
migrated, err := MigrateOAuthModelAlias(configFile)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !migrated {
|
||||
t.Fatal("expected migration to occur")
|
||||
}
|
||||
|
||||
// Verify other config preserved
|
||||
data, _ := os.ReadFile(configFile)
|
||||
content = string(data)
|
||||
if !strings.Contains(content, "debug: true") {
|
||||
t.Fatal("expected debug field to be preserved")
|
||||
}
|
||||
if !strings.Contains(content, "port: 8080") {
|
||||
t.Fatal("expected port field to be preserved")
|
||||
}
|
||||
if !strings.Contains(content, "api-keys:") {
|
||||
t.Fatal("expected api-keys field to be preserved")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateOAuthModelAlias_NonexistentFile(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
migrated, err := MigrateOAuthModelAlias("/nonexistent/path/config.yaml")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for nonexistent file: %v", err)
|
||||
}
|
||||
if migrated {
|
||||
t.Fatal("expected no migration for nonexistent file")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateOAuthModelAlias_EmptyFile(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
configFile := filepath.Join(dir, "config.yaml")
|
||||
|
||||
if err := os.WriteFile(configFile, []byte(""), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
migrated, err := MigrateOAuthModelAlias(configFile)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if migrated {
|
||||
t.Fatal("expected no migration for empty file")
|
||||
}
|
||||
}
|
||||
56
internal/config/oauth_model_alias_test.go
Normal file
56
internal/config/oauth_model_alias_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package config
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSanitizeOAuthModelAlias_PreservesForkFlag(t *testing.T) {
|
||||
cfg := &Config{
|
||||
OAuthModelAlias: map[string][]OAuthModelAlias{
|
||||
" CoDeX ": {
|
||||
{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
|
||||
{Name: "gpt-6", Alias: "g6"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cfg.SanitizeOAuthModelAlias()
|
||||
|
||||
aliases := cfg.OAuthModelAlias["codex"]
|
||||
if len(aliases) != 2 {
|
||||
t.Fatalf("expected 2 sanitized aliases, got %d", len(aliases))
|
||||
}
|
||||
if aliases[0].Name != "gpt-5" || aliases[0].Alias != "g5" || !aliases[0].Fork {
|
||||
t.Fatalf("expected first alias to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", aliases[0].Name, aliases[0].Alias, aliases[0].Fork)
|
||||
}
|
||||
if aliases[1].Name != "gpt-6" || aliases[1].Alias != "g6" || aliases[1].Fork {
|
||||
t.Fatalf("expected second alias to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", aliases[1].Name, aliases[1].Alias, aliases[1].Fork)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeOAuthModelAlias_AllowsMultipleAliasesForSameName(t *testing.T) {
|
||||
cfg := &Config{
|
||||
OAuthModelAlias: map[string][]OAuthModelAlias{
|
||||
"antigravity": {
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cfg.SanitizeOAuthModelAlias()
|
||||
|
||||
aliases := cfg.OAuthModelAlias["antigravity"]
|
||||
expected := []OAuthModelAlias{
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
|
||||
}
|
||||
if len(aliases) != len(expected) {
|
||||
t.Fatalf("expected %d sanitized aliases, got %d", len(expected), len(aliases))
|
||||
}
|
||||
for i, exp := range expected {
|
||||
if aliases[i].Name != exp.Name || aliases[i].Alias != exp.Alias || aliases[i].Fork != exp.Fork {
|
||||
t.Fatalf("expected alias %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, aliases[i].Name, aliases[i].Alias, aliases[i].Fork)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,56 +0,0 @@
|
||||
package config
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSanitizeOAuthModelMappings_PreservesForkFlag(t *testing.T) {
|
||||
cfg := &Config{
|
||||
OAuthModelMappings: map[string][]ModelNameMapping{
|
||||
" CoDeX ": {
|
||||
{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
|
||||
{Name: "gpt-6", Alias: "g6"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cfg.SanitizeOAuthModelMappings()
|
||||
|
||||
mappings := cfg.OAuthModelMappings["codex"]
|
||||
if len(mappings) != 2 {
|
||||
t.Fatalf("expected 2 sanitized mappings, got %d", len(mappings))
|
||||
}
|
||||
if mappings[0].Name != "gpt-5" || mappings[0].Alias != "g5" || !mappings[0].Fork {
|
||||
t.Fatalf("expected first mapping to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", mappings[0].Name, mappings[0].Alias, mappings[0].Fork)
|
||||
}
|
||||
if mappings[1].Name != "gpt-6" || mappings[1].Alias != "g6" || mappings[1].Fork {
|
||||
t.Fatalf("expected second mapping to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", mappings[1].Name, mappings[1].Alias, mappings[1].Fork)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeOAuthModelMappings_AllowsMultipleAliasesForSameName(t *testing.T) {
|
||||
cfg := &Config{
|
||||
OAuthModelMappings: map[string][]ModelNameMapping{
|
||||
"antigravity": {
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cfg.SanitizeOAuthModelMappings()
|
||||
|
||||
mappings := cfg.OAuthModelMappings["antigravity"]
|
||||
expected := []ModelNameMapping{
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
|
||||
{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
|
||||
}
|
||||
if len(mappings) != len(expected) {
|
||||
t.Fatalf("expected %d sanitized mappings, got %d", len(expected), len(mappings))
|
||||
}
|
||||
for i, exp := range expected {
|
||||
if mappings[i].Name != exp.Name || mappings[i].Alias != exp.Alias || mappings[i].Fork != exp.Fork {
|
||||
t.Fatalf("expected mapping %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, mappings[i].Name, mappings[i].Alias, mappings[i].Fork)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -25,6 +25,10 @@ type SDKConfig struct {
|
||||
|
||||
// Streaming configures server-side streaming behavior (keep-alives and safe bootstrap retries).
|
||||
Streaming StreamingConfig `yaml:"streaming" json:"streaming"`
|
||||
|
||||
// NonStreamKeepAliveInterval controls how often blank lines are emitted for non-streaming responses.
|
||||
// <= 0 disables keep-alives. Value is in seconds.
|
||||
NonStreamKeepAliveInterval int `yaml:"nonstream-keepalive-interval,omitempty" json:"nonstream-keepalive-interval,omitempty"`
|
||||
}
|
||||
|
||||
// StreamingConfig holds server streaming behavior configuration.
|
||||
|
||||
@@ -13,6 +13,10 @@ type VertexCompatKey struct {
|
||||
// Maps to the x-goog-api-key header.
|
||||
APIKey string `yaml:"api-key" json:"api-key"`
|
||||
|
||||
// Priority controls selection preference when multiple credentials match.
|
||||
// Higher values are preferred; defaults to 0.
|
||||
Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
|
||||
|
||||
// Prefix optionally namespaces model aliases for this credential (e.g., "teamA/vertex-pro").
|
||||
Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
|
||||
|
||||
@@ -32,6 +36,9 @@ type VertexCompatKey struct {
|
||||
Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"`
|
||||
}
|
||||
|
||||
func (k VertexCompatKey) GetAPIKey() string { return k.APIKey }
|
||||
func (k VertexCompatKey) GetBaseURL() string { return k.BaseURL }
|
||||
|
||||
// VertexCompatModel represents a model configuration for Vertex compatibility,
|
||||
// including the actual model name and its alias for API routing.
|
||||
type VertexCompatModel struct {
|
||||
|
||||
@@ -29,6 +29,9 @@ var (
|
||||
// Format: [2025-12-23 20:14:04] [debug] [manager.go:524] | a1b2c3d4 | Use API key sk-9...0RHO for model gpt-5.2
|
||||
type LogFormatter struct{}
|
||||
|
||||
// logFieldOrder defines the display order for common log fields.
|
||||
var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_mode", "original_value", "min", "max", "clamped_to", "error"}
|
||||
|
||||
// Format renders a single log entry with custom formatting.
|
||||
func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
|
||||
var buffer *bytes.Buffer
|
||||
@@ -52,11 +55,25 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
|
||||
}
|
||||
levelStr := fmt.Sprintf("%-5s", level)
|
||||
|
||||
// Build fields string (only print fields in logFieldOrder)
|
||||
var fieldsStr string
|
||||
if len(entry.Data) > 0 {
|
||||
var fields []string
|
||||
for _, k := range logFieldOrder {
|
||||
if v, ok := entry.Data[k]; ok {
|
||||
fields = append(fields, fmt.Sprintf("%s=%v", k, v))
|
||||
}
|
||||
}
|
||||
if len(fields) > 0 {
|
||||
fieldsStr = " " + strings.Join(fields, " ")
|
||||
}
|
||||
}
|
||||
|
||||
var formatted string
|
||||
if entry.Caller != nil {
|
||||
formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message)
|
||||
formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s%s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message, fieldsStr)
|
||||
} else {
|
||||
formatted = fmt.Sprintf("[%s] [%s] [%s] %s\n", timestamp, reqID, levelStr, message)
|
||||
formatted = fmt.Sprintf("[%s] [%s] [%s] %s%s\n", timestamp, reqID, levelStr, message, fieldsStr)
|
||||
}
|
||||
buffer.WriteString(formatted)
|
||||
|
||||
@@ -104,6 +121,24 @@ func isDirWritable(dir string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// ResolveLogDirectory determines the directory used for application logs.
|
||||
func ResolveLogDirectory(cfg *config.Config) string {
|
||||
logDir := "logs"
|
||||
if base := util.WritablePath(); base != "" {
|
||||
return filepath.Join(base, "logs")
|
||||
}
|
||||
if cfg == nil {
|
||||
return logDir
|
||||
}
|
||||
if !isDirWritable(logDir) {
|
||||
authDir := strings.TrimSpace(cfg.AuthDir)
|
||||
if authDir != "" {
|
||||
logDir = filepath.Join(authDir, "logs")
|
||||
}
|
||||
}
|
||||
return logDir
|
||||
}
|
||||
|
||||
// ConfigureLogOutput switches the global log destination between rotating files and stdout.
|
||||
// When logsMaxTotalSizeMB > 0, a background cleaner removes the oldest log files in the logs directory
|
||||
// until the total size is within the limit.
|
||||
@@ -113,12 +148,7 @@ func ConfigureLogOutput(cfg *config.Config) error {
|
||||
writerMu.Lock()
|
||||
defer writerMu.Unlock()
|
||||
|
||||
logDir := "logs"
|
||||
if base := util.WritablePath(); base != "" {
|
||||
logDir = filepath.Join(base, "logs")
|
||||
} else if !isDirWritable(logDir) {
|
||||
logDir = filepath.Join(cfg.AuthDir, "logs")
|
||||
}
|
||||
logDir := ResolveLogDirectory(cfg)
|
||||
|
||||
protectedPath := ""
|
||||
if cfg.LoggingToFile {
|
||||
|
||||
@@ -7,11 +7,27 @@ import (
|
||||
"embed"
|
||||
_ "embed"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// codexInstructionsEnabled controls whether CodexInstructionsForModel returns official instructions.
|
||||
// When false (default), CodexInstructionsForModel returns (true, "") immediately.
|
||||
// Set via SetCodexInstructionsEnabled from config.
|
||||
var codexInstructionsEnabled atomic.Bool
|
||||
|
||||
// SetCodexInstructionsEnabled sets whether codex instructions processing is enabled.
|
||||
func SetCodexInstructionsEnabled(enabled bool) {
|
||||
codexInstructionsEnabled.Store(enabled)
|
||||
}
|
||||
|
||||
// GetCodexInstructionsEnabled returns whether codex instructions processing is enabled.
|
||||
func GetCodexInstructionsEnabled() bool {
|
||||
return codexInstructionsEnabled.Load()
|
||||
}
|
||||
|
||||
//go:embed codex_instructions
|
||||
var codexInstructionsDir embed.FS
|
||||
|
||||
@@ -124,6 +140,9 @@ func codexInstructionsForCodex(modelName, systemInstructions string) (bool, stri
|
||||
}
|
||||
|
||||
func CodexInstructionsForModel(modelName, systemInstructions, userAgent string) (bool, string) {
|
||||
if !GetCodexInstructionsEnabled() {
|
||||
return true, ""
|
||||
}
|
||||
if IsOpenCodeUserAgent(userAgent) {
|
||||
return codexInstructionsForOpenCode(systemInstructions)
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.5 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-20251101",
|
||||
@@ -39,7 +39,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
Description: "Premium model combining maximum intelligence with practical performance",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-1-20250805",
|
||||
@@ -50,7 +50,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.1 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-20250514",
|
||||
@@ -61,7 +61,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-20250514",
|
||||
@@ -72,7 +72,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-7-sonnet-20250219",
|
||||
@@ -83,7 +83,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 3.7 Sonnet",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-5-haiku-20241022",
|
||||
@@ -287,6 +287,67 @@ func GetGeminiVertexModels() []*ModelInfo {
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
},
|
||||
// Imagen image generation models - use :predict action
|
||||
{
|
||||
ID: "imagen-4.0-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Generate",
|
||||
Description: "Imagen 4.0 image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-4.0-ultra-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-ultra-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Ultra Generate",
|
||||
Description: "Imagen 4.0 Ultra high-quality image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-3.0-generate-002",
|
||||
Object: "model",
|
||||
Created: 1740000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-3.0-generate-002",
|
||||
Version: "3.0",
|
||||
DisplayName: "Imagen 3.0 Generate",
|
||||
Description: "Imagen 3.0 image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-3.0-fast-generate-001",
|
||||
Object: "model",
|
||||
Created: 1740000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-3.0-fast-generate-001",
|
||||
Version: "3.0",
|
||||
DisplayName: "Imagen 3.0 Fast Generate",
|
||||
Description: "Imagen 3.0 fast image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
{
|
||||
ID: "imagen-4.0-fast-generate-001",
|
||||
Object: "model",
|
||||
Created: 1750000000,
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/imagen-4.0-fast-generate-001",
|
||||
Version: "4.0",
|
||||
DisplayName: "Imagen 4.0 Fast Generate",
|
||||
Description: "Imagen 4.0 fast image generation model",
|
||||
SupportedGenerationMethods: []string{"predict"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -432,7 +493,7 @@ func GetAIStudioModels() []*ModelInfo {
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-3-flash-preview",
|
||||
@@ -447,7 +508,7 @@ func GetAIStudioModels() []*ModelInfo {
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-pro-latest",
|
||||
@@ -742,6 +803,7 @@ func GetIFlowModels() []*ModelInfo {
|
||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
|
||||
}
|
||||
models := make([]*ModelInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
@@ -764,21 +826,23 @@ func GetIFlowModels() []*ModelInfo {
|
||||
type AntigravityModelConfig struct {
|
||||
Thinking *ThinkingSupport
|
||||
MaxCompletionTokens int
|
||||
Name string
|
||||
}
|
||||
|
||||
// GetAntigravityModelConfig returns static configuration for antigravity models.
|
||||
// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
|
||||
// Keys use upstream model names returned by the Antigravity models endpoint.
|
||||
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||
return map[string]*AntigravityModelConfig{
|
||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
|
||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
|
||||
"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
|
||||
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
|
||||
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
|
||||
"gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
|
||||
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
|
||||
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
|
||||
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
|
||||
"gpt-oss-120b-medium": {},
|
||||
"tab_flash_lite_preview": {},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -788,6 +852,7 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
|
||||
if modelID == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
allModels := [][]*ModelInfo{
|
||||
GetClaudeModels(),
|
||||
GetGeminiModels(),
|
||||
@@ -805,5 +870,15 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check Antigravity static config
|
||||
if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil {
|
||||
return &ModelInfo{
|
||||
ID: modelID,
|
||||
Thinking: cfg.Thinking,
|
||||
MaxCompletionTokens: cfg.MaxCompletionTokens,
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -51,6 +51,11 @@ type ModelInfo struct {
|
||||
// Thinking holds provider-specific reasoning/thinking budget capabilities.
|
||||
// This is optional and currently used for Gemini thinking budget normalization.
|
||||
Thinking *ThinkingSupport `json:"thinking,omitempty"`
|
||||
|
||||
// UserDefined indicates this model was defined through config file's models[]
|
||||
// array (e.g., openai-compatibility.*.models[], *-api-key.models[]).
|
||||
// UserDefined models have thinking configuration passed through without validation.
|
||||
UserDefined bool `json:"-"`
|
||||
}
|
||||
|
||||
// ThinkingSupport describes a model family's supported internal reasoning budget range.
|
||||
@@ -73,6 +78,8 @@ type ThinkingSupport struct {
|
||||
type ModelRegistration struct {
|
||||
// Info contains the model metadata
|
||||
Info *ModelInfo
|
||||
// InfoByProvider maps provider identifiers to specific ModelInfo to support differing capabilities.
|
||||
InfoByProvider map[string]*ModelInfo
|
||||
// Count is the number of active clients that can provide this model
|
||||
Count int
|
||||
// LastUpdated tracks when this registration was last modified
|
||||
@@ -127,6 +134,24 @@ func GetGlobalRegistry() *ModelRegistry {
|
||||
return globalRegistry
|
||||
}
|
||||
|
||||
// LookupModelInfo searches dynamic registry (provider-specific > global) then static definitions.
|
||||
func LookupModelInfo(modelID string, provider ...string) *ModelInfo {
|
||||
modelID = strings.TrimSpace(modelID)
|
||||
if modelID == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
p := ""
|
||||
if len(provider) > 0 {
|
||||
p = strings.ToLower(strings.TrimSpace(provider[0]))
|
||||
}
|
||||
|
||||
if info := GetGlobalRegistry().GetModelInfo(modelID, p); info != nil {
|
||||
return info
|
||||
}
|
||||
return LookupStaticModelInfo(modelID)
|
||||
}
|
||||
|
||||
// SetHook sets an optional hook for observing model registration changes.
|
||||
func (r *ModelRegistry) SetHook(hook ModelRegistryHook) {
|
||||
if r == nil {
|
||||
@@ -277,6 +302,9 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
|
||||
if count, okProv := reg.Providers[oldProvider]; okProv {
|
||||
if count <= toRemove {
|
||||
delete(reg.Providers, oldProvider)
|
||||
if reg.InfoByProvider != nil {
|
||||
delete(reg.InfoByProvider, oldProvider)
|
||||
}
|
||||
} else {
|
||||
reg.Providers[oldProvider] = count - toRemove
|
||||
}
|
||||
@@ -326,6 +354,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
|
||||
model := newModels[id]
|
||||
if reg, ok := r.models[id]; ok {
|
||||
reg.Info = cloneModelInfo(model)
|
||||
if provider != "" {
|
||||
if reg.InfoByProvider == nil {
|
||||
reg.InfoByProvider = make(map[string]*ModelInfo)
|
||||
}
|
||||
reg.InfoByProvider[provider] = cloneModelInfo(model)
|
||||
}
|
||||
reg.LastUpdated = now
|
||||
if reg.QuotaExceededClients != nil {
|
||||
delete(reg.QuotaExceededClients, clientID)
|
||||
@@ -389,11 +423,15 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
|
||||
if existing.SuspendedClients == nil {
|
||||
existing.SuspendedClients = make(map[string]string)
|
||||
}
|
||||
if existing.InfoByProvider == nil {
|
||||
existing.InfoByProvider = make(map[string]*ModelInfo)
|
||||
}
|
||||
if provider != "" {
|
||||
if existing.Providers == nil {
|
||||
existing.Providers = make(map[string]int)
|
||||
}
|
||||
existing.Providers[provider]++
|
||||
existing.InfoByProvider[provider] = cloneModelInfo(model)
|
||||
}
|
||||
log.Debugf("Incremented count for model %s, now %d clients", modelID, existing.Count)
|
||||
return
|
||||
@@ -401,6 +439,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
|
||||
|
||||
registration := &ModelRegistration{
|
||||
Info: cloneModelInfo(model),
|
||||
InfoByProvider: make(map[string]*ModelInfo),
|
||||
Count: 1,
|
||||
LastUpdated: now,
|
||||
QuotaExceededClients: make(map[string]*time.Time),
|
||||
@@ -408,6 +447,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
|
||||
}
|
||||
if provider != "" {
|
||||
registration.Providers = map[string]int{provider: 1}
|
||||
registration.InfoByProvider[provider] = cloneModelInfo(model)
|
||||
}
|
||||
r.models[modelID] = registration
|
||||
log.Debugf("Registered new model %s from provider %s", modelID, provider)
|
||||
@@ -433,6 +473,9 @@ func (r *ModelRegistry) removeModelRegistration(clientID, modelID, provider stri
|
||||
if count, ok := registration.Providers[provider]; ok {
|
||||
if count <= 1 {
|
||||
delete(registration.Providers, provider)
|
||||
if registration.InfoByProvider != nil {
|
||||
delete(registration.InfoByProvider, provider)
|
||||
}
|
||||
} else {
|
||||
registration.Providers[provider] = count - 1
|
||||
}
|
||||
@@ -514,6 +557,9 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
|
||||
if count, ok := registration.Providers[provider]; ok {
|
||||
if count <= 1 {
|
||||
delete(registration.Providers, provider)
|
||||
if registration.InfoByProvider != nil {
|
||||
delete(registration.InfoByProvider, provider)
|
||||
}
|
||||
} else {
|
||||
registration.Providers[provider] = count - 1
|
||||
}
|
||||
@@ -920,12 +966,22 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
|
||||
return result
|
||||
}
|
||||
|
||||
// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
|
||||
// Returns nil if the model is unknown to the registry.
|
||||
func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
|
||||
// GetModelInfo returns ModelInfo, prioritizing provider-specific definition if available.
|
||||
func (r *ModelRegistry) GetModelInfo(modelID, provider string) *ModelInfo {
|
||||
r.mutex.RLock()
|
||||
defer r.mutex.RUnlock()
|
||||
if reg, ok := r.models[modelID]; ok && reg != nil {
|
||||
// Try provider specific definition first
|
||||
if provider != "" && reg.InfoByProvider != nil {
|
||||
if reg.Providers != nil {
|
||||
if count, ok := reg.Providers[provider]; ok && count > 0 {
|
||||
if info, ok := reg.InfoByProvider[provider]; ok && info != nil {
|
||||
return info
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback to global info (last registered)
|
||||
return reg.Info
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -111,7 +111,8 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// Execute performs a non-streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
translatedReq, body, err := e.translateRequest(req, opts, false)
|
||||
@@ -119,7 +120,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
return resp, err
|
||||
}
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
|
||||
endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
URL: endpoint,
|
||||
@@ -166,7 +167,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
|
||||
// ExecuteStream performs a streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
translatedReq, body, err := e.translateRequest(req, opts, true)
|
||||
@@ -174,7 +176,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
return nil, err
|
||||
}
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
|
||||
endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
URL: endpoint,
|
||||
@@ -315,6 +317,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
|
||||
// CountTokens counts tokens for the given request using the AI Studio API.
|
||||
func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
_, body, err := e.translateRequest(req, opts, false)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
@@ -324,7 +327,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
|
||||
body.payload, _ = sjson.DeleteBytes(body.payload, "tools")
|
||||
body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings")
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, "countTokens", "")
|
||||
endpoint := e.buildEndpoint(baseModel, "countTokens", "")
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
URL: endpoint,
|
||||
@@ -380,22 +383,22 @@ type translatedPayload struct {
|
||||
}
|
||||
|
||||
func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
|
||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
|
||||
payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
|
||||
payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true)
|
||||
payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
||||
payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
|
||||
payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
|
||||
payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, translatedPayload{}, err
|
||||
}
|
||||
payload = fixGeminiImageAspectRatio(baseModel, payload)
|
||||
payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated)
|
||||
payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
|
||||
payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
|
||||
payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
|
||||
|
||||
@@ -24,7 +24,9 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -107,8 +109,10 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut
|
||||
|
||||
// Execute performs a non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
if isClaude || strings.Contains(req.Model, "gemini-3-pro") {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
|
||||
|
||||
if isClaude || strings.Contains(baseModel, "gemini-3-pro") {
|
||||
return e.executeClaudeNonStream(ctx, auth, req, opts)
|
||||
}
|
||||
|
||||
@@ -120,23 +124,25 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
auth = updatedAuth
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
|
||||
translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -146,7 +152,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return resp, err
|
||||
@@ -227,6 +233,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
|
||||
// executeClaudeNonStream performs a claude non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
if errToken != nil {
|
||||
return resp, errToken
|
||||
@@ -235,23 +243,25 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
|
||||
auth = updatedAuth
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, true)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
|
||||
translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -261,7 +271,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return resp, err
|
||||
@@ -507,8 +517,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
|
||||
}
|
||||
if usageResult := responseNode.Get("usageMetadata"); usageResult.Exists() {
|
||||
usageRaw = usageResult.Raw
|
||||
} else if usageResult := root.Get("usageMetadata"); usageResult.Exists() {
|
||||
usageRaw = usageResult.Raw
|
||||
} else if usageMetadataResult := root.Get("usageMetadata"); usageMetadataResult.Exists() {
|
||||
usageRaw = usageMetadataResult.Raw
|
||||
}
|
||||
|
||||
if partsResult := responseNode.Get("candidates.0.content.parts"); partsResult.IsArray() {
|
||||
@@ -587,6 +597,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
|
||||
|
||||
// ExecuteStream performs a streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
ctx = context.WithValue(ctx, "alt", "")
|
||||
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
@@ -597,25 +609,25 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
auth = updatedAuth
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
|
||||
translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -625,12 +637,11 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
@@ -771,6 +782,8 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
|
||||
|
||||
// CountTokens counts tokens for the given request using the Antigravity API.
|
||||
func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
if errToken != nil {
|
||||
return cliproxyexecutor.Response{}, errToken
|
||||
@@ -786,7 +799,17 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
// Prepare payload once (doesn't depend on baseURL)
|
||||
payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = deleteJSONField(payload, "request.safetySettings")
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -803,14 +826,6 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
|
||||
payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = deleteJSONField(payload, "request.safetySettings")
|
||||
|
||||
base := strings.TrimSuffix(baseURL, "/")
|
||||
if base == "" {
|
||||
base = buildBaseURL(auth)
|
||||
@@ -980,35 +995,37 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
|
||||
modelConfig := registry.GetAntigravityModelConfig()
|
||||
models := make([]*registry.ModelInfo, 0, len(result.Map()))
|
||||
for originalName := range result.Map() {
|
||||
aliasName := modelName2Alias(originalName)
|
||||
if aliasName != "" {
|
||||
cfg := modelConfig[aliasName]
|
||||
modelName := aliasName
|
||||
if cfg != nil && cfg.Name != "" {
|
||||
modelName = cfg.Name
|
||||
}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
ID: aliasName,
|
||||
Name: modelName,
|
||||
Description: aliasName,
|
||||
DisplayName: aliasName,
|
||||
Version: aliasName,
|
||||
Object: "model",
|
||||
Created: now,
|
||||
OwnedBy: antigravityAuthType,
|
||||
Type: antigravityAuthType,
|
||||
}
|
||||
// Look up Thinking support from static config using alias name
|
||||
if cfg != nil {
|
||||
if cfg.Thinking != nil {
|
||||
modelInfo.Thinking = cfg.Thinking
|
||||
}
|
||||
if cfg.MaxCompletionTokens > 0 {
|
||||
modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
|
||||
}
|
||||
}
|
||||
models = append(models, modelInfo)
|
||||
modelID := strings.TrimSpace(originalName)
|
||||
if modelID == "" {
|
||||
continue
|
||||
}
|
||||
switch modelID {
|
||||
case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
|
||||
continue
|
||||
}
|
||||
modelCfg := modelConfig[modelID]
|
||||
modelName := modelID
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
ID: modelID,
|
||||
Name: modelName,
|
||||
Description: modelID,
|
||||
DisplayName: modelID,
|
||||
Version: modelID,
|
||||
Object: "model",
|
||||
Created: now,
|
||||
OwnedBy: antigravityAuthType,
|
||||
Type: antigravityAuthType,
|
||||
}
|
||||
// Look up Thinking support from static config using upstream model name.
|
||||
if modelCfg != nil {
|
||||
if modelCfg.Thinking != nil {
|
||||
modelInfo.Thinking = modelCfg.Thinking
|
||||
}
|
||||
if modelCfg.MaxCompletionTokens > 0 {
|
||||
modelInfo.MaxCompletionTokens = modelCfg.MaxCompletionTokens
|
||||
}
|
||||
}
|
||||
models = append(models, modelInfo)
|
||||
}
|
||||
return models
|
||||
}
|
||||
@@ -1104,12 +1121,49 @@ func (e *AntigravityExecutor) refreshToken(ctx context.Context, auth *cliproxyau
|
||||
auth.Metadata["refresh_token"] = tokenResp.RefreshToken
|
||||
}
|
||||
auth.Metadata["expires_in"] = tokenResp.ExpiresIn
|
||||
auth.Metadata["timestamp"] = time.Now().UnixMilli()
|
||||
auth.Metadata["expired"] = time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339)
|
||||
now := time.Now()
|
||||
auth.Metadata["timestamp"] = now.UnixMilli()
|
||||
auth.Metadata["expired"] = now.Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339)
|
||||
auth.Metadata["type"] = antigravityAuthType
|
||||
if errProject := e.ensureAntigravityProjectID(ctx, auth, tokenResp.AccessToken); errProject != nil {
|
||||
log.Warnf("antigravity executor: ensure project id failed: %v", errProject)
|
||||
}
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
func (e *AntigravityExecutor) ensureAntigravityProjectID(ctx context.Context, auth *cliproxyauth.Auth, accessToken string) error {
|
||||
if auth == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if auth.Metadata["project_id"] != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
token := strings.TrimSpace(accessToken)
|
||||
if token == "" {
|
||||
token = metaStringValue(auth.Metadata, "access_token")
|
||||
}
|
||||
if token == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
projectID, errFetch := sdkAuth.FetchAntigravityProjectID(ctx, token, httpClient)
|
||||
if errFetch != nil {
|
||||
return errFetch
|
||||
}
|
||||
if strings.TrimSpace(projectID) == "" {
|
||||
return nil
|
||||
}
|
||||
if auth.Metadata == nil {
|
||||
auth.Metadata = make(map[string]any)
|
||||
}
|
||||
auth.Metadata["project_id"] = strings.TrimSpace(projectID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyauth.Auth, token, modelName string, payload []byte, stream bool, alt, baseURL string) (*http.Request, error) {
|
||||
if token == "" {
|
||||
return nil, statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
|
||||
@@ -1146,7 +1200,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
|
||||
}
|
||||
}
|
||||
payload = geminiToAntigravity(modelName, payload, projectID)
|
||||
payload, _ = sjson.SetBytes(payload, "model", alias2ModelName(modelName))
|
||||
payload, _ = sjson.SetBytes(payload, "model", modelName)
|
||||
|
||||
if strings.Contains(modelName, "claude") {
|
||||
strJSON := string(payload)
|
||||
@@ -1163,7 +1217,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
|
||||
payload = []byte(strJSON)
|
||||
}
|
||||
|
||||
if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-preview") {
|
||||
if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
|
||||
systemInstructionPartsResult := gjson.GetBytes(payload, "request.systemInstruction.parts")
|
||||
payload, _ = sjson.SetBytes(payload, "request.systemInstruction.role", "user")
|
||||
payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.0.text", systemInstruction)
|
||||
@@ -1353,13 +1407,6 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
|
||||
template, _ = sjson.Delete(template, "request.safetySettings")
|
||||
template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
|
||||
|
||||
if !strings.HasPrefix(modelName, "gemini-3-") {
|
||||
if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() {
|
||||
template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel")
|
||||
template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(modelName, "claude") {
|
||||
gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
|
||||
tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
|
||||
@@ -1417,108 +1464,3 @@ func generateProjectID() string {
|
||||
randomPart := strings.ToLower(uuid.NewString())[:5]
|
||||
return adj + "-" + noun + "-" + randomPart
|
||||
}
|
||||
|
||||
func modelName2Alias(modelName string) string {
|
||||
switch modelName {
|
||||
case "rev19-uic3-1p":
|
||||
return "gemini-2.5-computer-use-preview-10-2025"
|
||||
case "gemini-3-pro-image":
|
||||
return "gemini-3-pro-image-preview"
|
||||
case "gemini-3-pro-high":
|
||||
return "gemini-3-pro-preview"
|
||||
case "gemini-3-flash":
|
||||
return "gemini-3-flash-preview"
|
||||
case "claude-sonnet-4-5":
|
||||
return "gemini-claude-sonnet-4-5"
|
||||
case "claude-sonnet-4-5-thinking":
|
||||
return "gemini-claude-sonnet-4-5-thinking"
|
||||
case "claude-opus-4-5-thinking":
|
||||
return "gemini-claude-opus-4-5-thinking"
|
||||
case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
|
||||
return ""
|
||||
default:
|
||||
return modelName
|
||||
}
|
||||
}
|
||||
|
||||
func alias2ModelName(modelName string) string {
|
||||
switch modelName {
|
||||
case "gemini-2.5-computer-use-preview-10-2025":
|
||||
return "rev19-uic3-1p"
|
||||
case "gemini-3-pro-image-preview":
|
||||
return "gemini-3-pro-image"
|
||||
case "gemini-3-pro-preview":
|
||||
return "gemini-3-pro-high"
|
||||
case "gemini-3-flash-preview":
|
||||
return "gemini-3-flash"
|
||||
case "gemini-claude-sonnet-4-5":
|
||||
return "claude-sonnet-4-5"
|
||||
case "gemini-claude-sonnet-4-5-thinking":
|
||||
return "claude-sonnet-4-5-thinking"
|
||||
case "gemini-claude-opus-4-5-thinking":
|
||||
return "claude-opus-4-5-thinking"
|
||||
default:
|
||||
return modelName
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeAntigravityThinking clamps or removes thinking config based on model support.
|
||||
// For Claude models, it additionally ensures thinking budget < max_tokens.
|
||||
func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte {
|
||||
payload = util.StripThinkingConfigIfUnsupported(model, payload)
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
if !budget.Exists() {
|
||||
return payload
|
||||
}
|
||||
raw := int(budget.Int())
|
||||
normalized := util.NormalizeThinkingBudget(model, raw)
|
||||
|
||||
if isClaude {
|
||||
effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
|
||||
if effectiveMax > 0 && normalized >= effectiveMax {
|
||||
normalized = effectiveMax - 1
|
||||
}
|
||||
minBudget := antigravityMinThinkingBudget(model)
|
||||
if minBudget > 0 && normalized >= 0 && normalized < minBudget {
|
||||
// Budget is below minimum, remove thinking config entirely
|
||||
payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
|
||||
return payload
|
||||
}
|
||||
if setDefaultMax {
|
||||
if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
|
||||
payload = res
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
if err != nil {
|
||||
return payload
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// antigravityEffectiveMaxTokens returns the max tokens to cap thinking:
|
||||
// prefer request-provided maxOutputTokens; otherwise fall back to model default.
|
||||
// The boolean indicates whether the value came from the model default (and thus should be written back).
|
||||
func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) {
|
||||
if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
|
||||
return int(maxTok.Int()), false
|
||||
}
|
||||
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
|
||||
return modelInfo.MaxCompletionTokens, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// antigravityMinThinkingBudget returns the minimum thinking budget for a model.
|
||||
// Falls back to -1 if no model info is found.
|
||||
func antigravityMinThinkingBudget(model string) int {
|
||||
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
|
||||
return modelInfo.Thinking.Min
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -84,17 +84,15 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
@@ -103,23 +101,24 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(model, req.Metadata, body)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
if !strings.HasPrefix(model, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
|
||||
// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
|
||||
// based on client type and configuration.
|
||||
body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
||||
body = ensureMaxTokensForThinking(model, body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
extraBetas, body = extractAndRemoveBetas(body)
|
||||
@@ -218,37 +217,39 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(model, req.Metadata, body)
|
||||
body = checkSystemInstructions(body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
|
||||
// based on client type and configuration.
|
||||
body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
||||
body = ensureMaxTokensForThinking(model, body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
extraBetas, body = extractAndRemoveBetas(body)
|
||||
@@ -381,8 +382,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
@@ -391,14 +393,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
to := sdktranslator.FromString("claude")
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
if !strings.HasPrefix(model, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
|
||||
@@ -527,17 +525,6 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
|
||||
return betas, body
|
||||
}
|
||||
|
||||
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
|
||||
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
|
||||
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
|
||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
||||
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
|
||||
if !ok {
|
||||
return body
|
||||
}
|
||||
return util.ApplyClaudeThinkingConfig(body, budget)
|
||||
}
|
||||
|
||||
// disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking.
|
||||
// Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool.
|
||||
// See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations
|
||||
@@ -551,126 +538,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
|
||||
return body
|
||||
}
|
||||
|
||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
||||
// Anthropic API requires this constraint; violating it returns a 400 error.
|
||||
// This function should be called after all thinking configuration is finalized.
|
||||
// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
|
||||
func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
|
||||
thinkingType := gjson.GetBytes(body, "thinking.type").String()
|
||||
if thinkingType != "enabled" {
|
||||
return body
|
||||
}
|
||||
|
||||
budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
|
||||
if budgetTokens <= 0 {
|
||||
return body
|
||||
}
|
||||
|
||||
maxTokens := gjson.GetBytes(body, "max_tokens").Int()
|
||||
|
||||
// Look up the model's max completion tokens from the registry
|
||||
maxCompletionTokens := 0
|
||||
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil {
|
||||
maxCompletionTokens = modelInfo.MaxCompletionTokens
|
||||
}
|
||||
|
||||
// Fall back to budget + buffer if registry lookup fails or returns 0
|
||||
const fallbackBuffer = 4000
|
||||
requiredMaxTokens := budgetTokens + fallbackBuffer
|
||||
if maxCompletionTokens > 0 {
|
||||
requiredMaxTokens = int64(maxCompletionTokens)
|
||||
}
|
||||
|
||||
if maxTokens < requiredMaxTokens {
|
||||
body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveClaudeConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
}
|
||||
var attrKey, attrBase string
|
||||
if auth.Attributes != nil {
|
||||
attrKey = strings.TrimSpace(auth.Attributes["api_key"])
|
||||
attrBase = strings.TrimSpace(auth.Attributes["base_url"])
|
||||
}
|
||||
for i := range e.cfg.ClaudeKey {
|
||||
entry := &e.cfg.ClaudeKey[i]
|
||||
cfgKey := strings.TrimSpace(entry.APIKey)
|
||||
cfgBase := strings.TrimSpace(entry.BaseURL)
|
||||
if attrKey != "" && attrBase != "" {
|
||||
if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
continue
|
||||
}
|
||||
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
|
||||
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
if attrKey != "" {
|
||||
for i := range e.cfg.ClaudeKey {
|
||||
entry := &e.cfg.ClaudeKey[i]
|
||||
if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type compositeReadCloser struct {
|
||||
io.Reader
|
||||
closers []func() error
|
||||
@@ -956,3 +823,163 @@ func stripClaudeToolPrefixFromStreamLine(line []byte, prefix string) []byte {
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// getClientUserAgent extracts the client User-Agent from the gin context.
|
||||
func getClientUserAgent(ctx context.Context) string {
|
||||
if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
|
||||
return ginCtx.GetHeader("User-Agent")
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// getCloakConfigFromAuth extracts cloak configuration from auth attributes.
|
||||
// Returns (cloakMode, strictMode, sensitiveWords).
|
||||
func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string) {
|
||||
if auth == nil || auth.Attributes == nil {
|
||||
return "auto", false, nil
|
||||
}
|
||||
|
||||
cloakMode := auth.Attributes["cloak_mode"]
|
||||
if cloakMode == "" {
|
||||
cloakMode = "auto"
|
||||
}
|
||||
|
||||
strictMode := strings.ToLower(auth.Attributes["cloak_strict_mode"]) == "true"
|
||||
|
||||
var sensitiveWords []string
|
||||
if wordsStr := auth.Attributes["cloak_sensitive_words"]; wordsStr != "" {
|
||||
sensitiveWords = strings.Split(wordsStr, ",")
|
||||
for i := range sensitiveWords {
|
||||
sensitiveWords[i] = strings.TrimSpace(sensitiveWords[i])
|
||||
}
|
||||
}
|
||||
|
||||
return cloakMode, strictMode, sensitiveWords
|
||||
}
|
||||
|
||||
// resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig.
|
||||
func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.CloakConfig {
|
||||
if cfg == nil || auth == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
apiKey, baseURL := claudeCreds(auth)
|
||||
if apiKey == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i := range cfg.ClaudeKey {
|
||||
entry := &cfg.ClaudeKey[i]
|
||||
cfgKey := strings.TrimSpace(entry.APIKey)
|
||||
cfgBase := strings.TrimSpace(entry.BaseURL)
|
||||
|
||||
// Match by API key
|
||||
if strings.EqualFold(cfgKey, apiKey) {
|
||||
// If baseURL is specified, also check it
|
||||
if baseURL != "" && cfgBase != "" && !strings.EqualFold(cfgBase, baseURL) {
|
||||
continue
|
||||
}
|
||||
return entry.Cloak
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// injectFakeUserID generates and injects a fake user ID into the request metadata.
|
||||
func injectFakeUserID(payload []byte) []byte {
|
||||
metadata := gjson.GetBytes(payload, "metadata")
|
||||
if !metadata.Exists() {
|
||||
payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
|
||||
return payload
|
||||
}
|
||||
|
||||
existingUserID := gjson.GetBytes(payload, "metadata.user_id").String()
|
||||
if existingUserID == "" || !isValidUserID(existingUserID) {
|
||||
payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// checkSystemInstructionsWithMode injects Claude Code system prompt.
|
||||
// In strict mode, it replaces all user system messages.
|
||||
// In non-strict mode (default), it prepends to existing system messages.
|
||||
func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
|
||||
system := gjson.GetBytes(payload, "system")
|
||||
claudeCodeInstructions := `[{"type":"text","text":"You are Claude Code, Anthropic's official CLI for Claude."}]`
|
||||
|
||||
if strictMode {
|
||||
// Strict mode: replace all system messages with Claude Code prompt only
|
||||
payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
|
||||
return payload
|
||||
}
|
||||
|
||||
// Non-strict mode (default): prepend Claude Code prompt to existing system messages
|
||||
if system.IsArray() {
|
||||
if gjson.GetBytes(payload, "system.0.text").String() != "You are Claude Code, Anthropic's official CLI for Claude." {
|
||||
system.ForEach(func(_, part gjson.Result) bool {
|
||||
if part.Get("type").String() == "text" {
|
||||
claudeCodeInstructions, _ = sjson.SetRaw(claudeCodeInstructions, "-1", part.Raw)
|
||||
}
|
||||
return true
|
||||
})
|
||||
payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
|
||||
}
|
||||
} else {
|
||||
payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// applyCloaking applies cloaking transformations to the payload based on config and client.
|
||||
// Cloaking includes: system prompt injection, fake user ID, and sensitive word obfuscation.
|
||||
func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string) []byte {
|
||||
clientUserAgent := getClientUserAgent(ctx)
|
||||
|
||||
// Get cloak config from ClaudeKey configuration
|
||||
cloakCfg := resolveClaudeKeyCloakConfig(cfg, auth)
|
||||
|
||||
// Determine cloak settings
|
||||
var cloakMode string
|
||||
var strictMode bool
|
||||
var sensitiveWords []string
|
||||
|
||||
if cloakCfg != nil {
|
||||
cloakMode = cloakCfg.Mode
|
||||
strictMode = cloakCfg.StrictMode
|
||||
sensitiveWords = cloakCfg.SensitiveWords
|
||||
}
|
||||
|
||||
// Fallback to auth attributes if no config found
|
||||
if cloakMode == "" {
|
||||
attrMode, attrStrict, attrWords := getCloakConfigFromAuth(auth)
|
||||
cloakMode = attrMode
|
||||
if !strictMode {
|
||||
strictMode = attrStrict
|
||||
}
|
||||
if len(sensitiveWords) == 0 {
|
||||
sensitiveWords = attrWords
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if cloaking should be applied
|
||||
if !shouldCloak(cloakMode, clientUserAgent) {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Skip system instructions for claude-3-5-haiku models
|
||||
if !strings.HasPrefix(model, "claude-3-5-haiku") {
|
||||
payload = checkSystemInstructionsWithMode(payload, strictMode)
|
||||
}
|
||||
|
||||
// Inject fake user ID
|
||||
payload = injectFakeUserID(payload)
|
||||
|
||||
// Apply sensitive word obfuscation
|
||||
if len(sensitiveWords) > 0 {
|
||||
matcher := buildSensitiveWordMatcher(sensitiveWords)
|
||||
payload = obfuscateSensitiveWords(payload, matcher)
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
176
internal/runtime/executor/cloak_obfuscate.go
Normal file
176
internal/runtime/executor/cloak_obfuscate.go
Normal file
@@ -0,0 +1,176 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// zeroWidthSpace is the Unicode zero-width space character used for obfuscation.
|
||||
const zeroWidthSpace = "\u200B"
|
||||
|
||||
// SensitiveWordMatcher holds the compiled regex for matching sensitive words.
|
||||
type SensitiveWordMatcher struct {
|
||||
regex *regexp.Regexp
|
||||
}
|
||||
|
||||
// buildSensitiveWordMatcher compiles a regex from the word list.
|
||||
// Words are sorted by length (longest first) for proper matching.
|
||||
func buildSensitiveWordMatcher(words []string) *SensitiveWordMatcher {
|
||||
if len(words) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filter and normalize words
|
||||
var validWords []string
|
||||
for _, w := range words {
|
||||
w = strings.TrimSpace(w)
|
||||
if utf8.RuneCountInString(w) >= 2 && !strings.Contains(w, zeroWidthSpace) {
|
||||
validWords = append(validWords, w)
|
||||
}
|
||||
}
|
||||
|
||||
if len(validWords) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort by length (longest first) for proper matching
|
||||
sort.Slice(validWords, func(i, j int) bool {
|
||||
return len(validWords[i]) > len(validWords[j])
|
||||
})
|
||||
|
||||
// Escape and join
|
||||
escaped := make([]string, len(validWords))
|
||||
for i, w := range validWords {
|
||||
escaped[i] = regexp.QuoteMeta(w)
|
||||
}
|
||||
|
||||
pattern := "(?i)" + strings.Join(escaped, "|")
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &SensitiveWordMatcher{regex: re}
|
||||
}
|
||||
|
||||
// obfuscateWord inserts a zero-width space after the first grapheme.
|
||||
func obfuscateWord(word string) string {
|
||||
if strings.Contains(word, zeroWidthSpace) {
|
||||
return word
|
||||
}
|
||||
|
||||
// Get first rune
|
||||
r, size := utf8.DecodeRuneInString(word)
|
||||
if r == utf8.RuneError || size >= len(word) {
|
||||
return word
|
||||
}
|
||||
|
||||
return string(r) + zeroWidthSpace + word[size:]
|
||||
}
|
||||
|
||||
// obfuscateText replaces all sensitive words in the text.
|
||||
func (m *SensitiveWordMatcher) obfuscateText(text string) string {
|
||||
if m == nil || m.regex == nil {
|
||||
return text
|
||||
}
|
||||
return m.regex.ReplaceAllStringFunc(text, obfuscateWord)
|
||||
}
|
||||
|
||||
// obfuscateSensitiveWords processes the payload and obfuscates sensitive words
|
||||
// in system blocks and message content.
|
||||
func obfuscateSensitiveWords(payload []byte, matcher *SensitiveWordMatcher) []byte {
|
||||
if matcher == nil || matcher.regex == nil {
|
||||
return payload
|
||||
}
|
||||
|
||||
// Obfuscate in system blocks
|
||||
payload = obfuscateSystemBlocks(payload, matcher)
|
||||
|
||||
// Obfuscate in messages
|
||||
payload = obfuscateMessages(payload, matcher)
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// obfuscateSystemBlocks obfuscates sensitive words in system blocks.
|
||||
func obfuscateSystemBlocks(payload []byte, matcher *SensitiveWordMatcher) []byte {
|
||||
system := gjson.GetBytes(payload, "system")
|
||||
if !system.Exists() {
|
||||
return payload
|
||||
}
|
||||
|
||||
if system.IsArray() {
|
||||
modified := false
|
||||
system.ForEach(func(key, value gjson.Result) bool {
|
||||
if value.Get("type").String() == "text" {
|
||||
text := value.Get("text").String()
|
||||
obfuscated := matcher.obfuscateText(text)
|
||||
if obfuscated != text {
|
||||
path := "system." + key.String() + ".text"
|
||||
payload, _ = sjson.SetBytes(payload, path, obfuscated)
|
||||
modified = true
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
if modified {
|
||||
return payload
|
||||
}
|
||||
} else if system.Type == gjson.String {
|
||||
text := system.String()
|
||||
obfuscated := matcher.obfuscateText(text)
|
||||
if obfuscated != text {
|
||||
payload, _ = sjson.SetBytes(payload, "system", obfuscated)
|
||||
}
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// obfuscateMessages obfuscates sensitive words in message content.
|
||||
func obfuscateMessages(payload []byte, matcher *SensitiveWordMatcher) []byte {
|
||||
messages := gjson.GetBytes(payload, "messages")
|
||||
if !messages.Exists() || !messages.IsArray() {
|
||||
return payload
|
||||
}
|
||||
|
||||
messages.ForEach(func(msgKey, msg gjson.Result) bool {
|
||||
content := msg.Get("content")
|
||||
if !content.Exists() {
|
||||
return true
|
||||
}
|
||||
|
||||
msgPath := "messages." + msgKey.String()
|
||||
|
||||
if content.Type == gjson.String {
|
||||
// Simple string content
|
||||
text := content.String()
|
||||
obfuscated := matcher.obfuscateText(text)
|
||||
if obfuscated != text {
|
||||
payload, _ = sjson.SetBytes(payload, msgPath+".content", obfuscated)
|
||||
}
|
||||
} else if content.IsArray() {
|
||||
// Array of content blocks
|
||||
content.ForEach(func(blockKey, block gjson.Result) bool {
|
||||
if block.Get("type").String() == "text" {
|
||||
text := block.Get("text").String()
|
||||
obfuscated := matcher.obfuscateText(text)
|
||||
if obfuscated != text {
|
||||
path := msgPath + ".content." + blockKey.String() + ".text"
|
||||
payload, _ = sjson.SetBytes(payload, path, obfuscated)
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
|
||||
return payload
|
||||
}
|
||||
47
internal/runtime/executor/cloak_utils.go
Normal file
47
internal/runtime/executor/cloak_utils.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// userIDPattern matches Claude Code format: user_[64-hex]_account__session_[uuid-v4]
|
||||
var userIDPattern = regexp.MustCompile(`^user_[a-fA-F0-9]{64}_account__session_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`)
|
||||
|
||||
// generateFakeUserID generates a fake user ID in Claude Code format.
|
||||
// Format: user_[64-hex-chars]_account__session_[UUID-v4]
|
||||
func generateFakeUserID() string {
|
||||
hexBytes := make([]byte, 32)
|
||||
_, _ = rand.Read(hexBytes)
|
||||
hexPart := hex.EncodeToString(hexBytes)
|
||||
uuidPart := uuid.New().String()
|
||||
return "user_" + hexPart + "_account__session_" + uuidPart
|
||||
}
|
||||
|
||||
// isValidUserID checks if a user ID matches Claude Code format.
|
||||
func isValidUserID(userID string) bool {
|
||||
return userIDPattern.MatchString(userID)
|
||||
}
|
||||
|
||||
// shouldCloak determines if request should be cloaked based on config and client User-Agent.
|
||||
// Returns true if cloaking should be applied.
|
||||
func shouldCloak(cloakMode string, userAgent string) bool {
|
||||
switch strings.ToLower(cloakMode) {
|
||||
case "always":
|
||||
return true
|
||||
case "never":
|
||||
return false
|
||||
default: // "auto" or empty
|
||||
// If client is Claude Code, don't cloak
|
||||
return !strings.HasPrefix(userAgent, "claude-cli")
|
||||
}
|
||||
}
|
||||
|
||||
// isClaudeCodeClient checks if the User-Agent indicates a Claude Code client.
|
||||
func isClaudeCodeClient(userAgent string) bool {
|
||||
return strings.HasPrefix(userAgent, "claude-cli")
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -72,18 +73,15 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://chatgpt.com/backend-api/codex"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
@@ -93,20 +91,25 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
|
||||
body = sdktranslator.TranslateRequest(from, to, model, body, false)
|
||||
body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
|
||||
body = misc.StripCodexUserAgent(body)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
body, _ = sjson.SetBytes(body, "stream", true)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
|
||||
body, _ = sjson.DeleteBytes(body, "safety_identifier")
|
||||
if !gjson.GetBytes(body, "instructions").Exists() {
|
||||
body, _ = sjson.SetBytes(body, "instructions", "")
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||
@@ -182,18 +185,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := codexCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://chatgpt.com/backend-api/codex"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
@@ -203,20 +203,24 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
|
||||
body = sdktranslator.TranslateRequest(from, to, model, body, true)
|
||||
body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
|
||||
body = misc.StripCodexUserAgent(body)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body, _ = sjson.DeleteBytes(body, "safety_identifier")
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
if !gjson.GetBytes(body, "instructions").Exists() {
|
||||
body, _ = sjson.SetBytes(body, "instructions", "")
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||
@@ -303,25 +307,30 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
userAgent := codexUserAgent(ctx)
|
||||
body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
|
||||
body = sdktranslator.TranslateRequest(from, to, model, body, false)
|
||||
body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
|
||||
body = misc.StripCodexUserAgent(body)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
|
||||
body, _ = sjson.DeleteBytes(body, "safety_identifier")
|
||||
body, _ = sjson.SetBytes(body, "stream", false)
|
||||
if !gjson.GetBytes(body, "instructions").Exists() {
|
||||
body, _ = sjson.SetBytes(body, "instructions", "")
|
||||
}
|
||||
|
||||
enc, err := tokenizerForCodexModel(model)
|
||||
enc, err := tokenizerForCodexModel(baseModel)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err)
|
||||
}
|
||||
@@ -593,51 +602,6 @@ func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
|
||||
return
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveCodexConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) resolveCodexConfig(auth *cliproxyauth.Auth) *config.CodexKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -102,28 +103,33 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
// Execute performs a non-streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -133,9 +139,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
}
|
||||
|
||||
projectID := resolveGeminiProjectID(auth)
|
||||
models := cliPreviewFallbackOrder(req.Model)
|
||||
if len(models) == 0 || models[0] != req.Model {
|
||||
models = append([]string{req.Model}, models...)
|
||||
models := cliPreviewFallbackOrder(baseModel)
|
||||
if len(models) == 0 || models[0] != baseModel {
|
||||
models = append([]string{baseModel}, models...)
|
||||
}
|
||||
|
||||
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -246,34 +252,39 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
|
||||
|
||||
projectID := resolveGeminiProjectID(auth)
|
||||
|
||||
models := cliPreviewFallbackOrder(req.Model)
|
||||
if len(models) == 0 || models[0] != req.Model {
|
||||
models = append([]string{req.Model}, models...)
|
||||
models := cliPreviewFallbackOrder(baseModel)
|
||||
if len(models) == 0 || models[0] != baseModel {
|
||||
models = append([]string{baseModel}, models...)
|
||||
}
|
||||
|
||||
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -435,6 +446,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
@@ -443,9 +456,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
|
||||
models := cliPreviewFallbackOrder(req.Model)
|
||||
if len(models) == 0 || models[0] != req.Model {
|
||||
models = append([]string{req.Model}, models...)
|
||||
models := cliPreviewFallbackOrder(baseModel)
|
||||
if len(models) == 0 || models[0] != baseModel {
|
||||
models = append([]string{baseModel}, models...)
|
||||
}
|
||||
|
||||
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -463,15 +476,18 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
// The loop variable attemptModel is only used as the concrete model id sent to the upstream
|
||||
// Gemini CLI endpoint when iterating fallback variants.
|
||||
for _, attemptModel := range models {
|
||||
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
||||
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
|
||||
for range models {
|
||||
payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = deleteJSONField(payload, "request.safetySettings")
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiCLIImageAspectRatio(req.Model, payload)
|
||||
payload = fixGeminiCLIImageAspectRatio(baseModel, payload)
|
||||
|
||||
tok, errTok := tokenSource.Token()
|
||||
if errTok != nil {
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -102,16 +103,13 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
|
||||
// - cliproxyexecutor.Response: The response from the API
|
||||
// - error: An error if the request fails
|
||||
func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
// Official Gemini API via API key or OAuth bearer
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
@@ -119,15 +117,17 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -136,7 +136,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
}
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -206,34 +206,33 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini API.
|
||||
func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -331,27 +330,28 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini API.
|
||||
func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model)
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
|
||||
translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "countTokens")
|
||||
|
||||
requestBody := bytes.NewReader(translatedReq)
|
||||
|
||||
@@ -450,51 +450,6 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string {
|
||||
return base
|
||||
}
|
||||
|
||||
func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveGeminiConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
|
||||
@@ -12,10 +12,11 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -31,6 +32,143 @@ const (
|
||||
vertexAPIVersion = "v1"
|
||||
)
|
||||
|
||||
// isImagenModel checks if the model name is an Imagen image generation model.
|
||||
// Imagen models use the :predict action instead of :generateContent.
|
||||
func isImagenModel(model string) bool {
|
||||
lowerModel := strings.ToLower(model)
|
||||
return strings.Contains(lowerModel, "imagen")
|
||||
}
|
||||
|
||||
// getVertexAction returns the appropriate action for the given model.
|
||||
// Imagen models use "predict", while Gemini models use "generateContent".
|
||||
func getVertexAction(model string, isStream bool) string {
|
||||
if isImagenModel(model) {
|
||||
return "predict"
|
||||
}
|
||||
if isStream {
|
||||
return "streamGenerateContent"
|
||||
}
|
||||
return "generateContent"
|
||||
}
|
||||
|
||||
// convertImagenToGeminiResponse converts Imagen API response to Gemini format
|
||||
// so it can be processed by the standard translation pipeline.
|
||||
// This ensures Imagen models return responses in the same format as gemini-3-pro-image-preview.
|
||||
func convertImagenToGeminiResponse(data []byte, model string) []byte {
|
||||
predictions := gjson.GetBytes(data, "predictions")
|
||||
if !predictions.Exists() || !predictions.IsArray() {
|
||||
return data
|
||||
}
|
||||
|
||||
// Build Gemini-compatible response with inlineData
|
||||
parts := make([]map[string]any, 0)
|
||||
for _, pred := range predictions.Array() {
|
||||
imageData := pred.Get("bytesBase64Encoded").String()
|
||||
mimeType := pred.Get("mimeType").String()
|
||||
if mimeType == "" {
|
||||
mimeType = "image/png"
|
||||
}
|
||||
if imageData != "" {
|
||||
parts = append(parts, map[string]any{
|
||||
"inlineData": map[string]any{
|
||||
"mimeType": mimeType,
|
||||
"data": imageData,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Generate unique response ID using timestamp
|
||||
responseId := fmt.Sprintf("imagen-%d", time.Now().UnixNano())
|
||||
|
||||
response := map[string]any{
|
||||
"candidates": []map[string]any{{
|
||||
"content": map[string]any{
|
||||
"parts": parts,
|
||||
"role": "model",
|
||||
},
|
||||
"finishReason": "STOP",
|
||||
}},
|
||||
"responseId": responseId,
|
||||
"modelVersion": model,
|
||||
// Imagen API doesn't return token counts, set to 0 for tracking purposes
|
||||
"usageMetadata": map[string]any{
|
||||
"promptTokenCount": 0,
|
||||
"candidatesTokenCount": 0,
|
||||
"totalTokenCount": 0,
|
||||
},
|
||||
}
|
||||
|
||||
result, err := json.Marshal(response)
|
||||
if err != nil {
|
||||
return data
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// convertToImagenRequest converts a Gemini-style request to Imagen API format.
|
||||
// Imagen API uses a different structure: instances[].prompt instead of contents[].
|
||||
func convertToImagenRequest(payload []byte) ([]byte, error) {
|
||||
// Extract prompt from Gemini-style contents
|
||||
prompt := ""
|
||||
|
||||
// Try to get prompt from contents[0].parts[0].text
|
||||
contentsText := gjson.GetBytes(payload, "contents.0.parts.0.text")
|
||||
if contentsText.Exists() {
|
||||
prompt = contentsText.String()
|
||||
}
|
||||
|
||||
// If no contents, try messages format (OpenAI-compatible)
|
||||
if prompt == "" {
|
||||
messagesText := gjson.GetBytes(payload, "messages.#.content")
|
||||
if messagesText.Exists() && messagesText.IsArray() {
|
||||
for _, msg := range messagesText.Array() {
|
||||
if msg.String() != "" {
|
||||
prompt = msg.String()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If still no prompt, try direct prompt field
|
||||
if prompt == "" {
|
||||
directPrompt := gjson.GetBytes(payload, "prompt")
|
||||
if directPrompt.Exists() {
|
||||
prompt = directPrompt.String()
|
||||
}
|
||||
}
|
||||
|
||||
if prompt == "" {
|
||||
return nil, fmt.Errorf("imagen: no prompt found in request")
|
||||
}
|
||||
|
||||
// Build Imagen API request
|
||||
imagenReq := map[string]any{
|
||||
"instances": []map[string]any{
|
||||
{
|
||||
"prompt": prompt,
|
||||
},
|
||||
},
|
||||
"parameters": map[string]any{
|
||||
"sampleCount": 1,
|
||||
},
|
||||
}
|
||||
|
||||
// Extract optional parameters
|
||||
if aspectRatio := gjson.GetBytes(payload, "aspectRatio"); aspectRatio.Exists() {
|
||||
imagenReq["parameters"].(map[string]any)["aspectRatio"] = aspectRatio.String()
|
||||
}
|
||||
if sampleCount := gjson.GetBytes(payload, "sampleCount"); sampleCount.Exists() {
|
||||
imagenReq["parameters"].(map[string]any)["sampleCount"] = int(sampleCount.Int())
|
||||
}
|
||||
if negativePrompt := gjson.GetBytes(payload, "negativePrompt"); negativePrompt.Exists() {
|
||||
imagenReq["instances"].([]map[string]any)[0]["negativePrompt"] = negativePrompt.String()
|
||||
}
|
||||
|
||||
return json.Marshal(imagenReq)
|
||||
}
|
||||
|
||||
// GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials.
|
||||
type GeminiVertexExecutor struct {
|
||||
cfg *config.Config
|
||||
@@ -155,39 +293,50 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut
|
||||
// executeWithServiceAccount handles authentication using service account credentials.
|
||||
// This method contains the original service account authentication logic.
|
||||
func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
var body []byte
|
||||
|
||||
action := "generateContent"
|
||||
// Handle Imagen models with special request format
|
||||
if isImagenModel(baseModel) {
|
||||
imagenBody, errImagen := convertToImagenRequest(req.Payload)
|
||||
if errImagen != nil {
|
||||
return resp, errImagen
|
||||
}
|
||||
body = imagenBody
|
||||
} else {
|
||||
// Standard Gemini translation flow
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
}
|
||||
|
||||
action := getVertexAction(baseModel, false)
|
||||
if req.Metadata != nil {
|
||||
if a, _ := req.Metadata["action"].(string); a == "countTokens" {
|
||||
action = "countTokens"
|
||||
}
|
||||
}
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -250,6 +399,16 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
reporter.publish(ctx, parseGeminiUsage(data))
|
||||
|
||||
// For Imagen models, convert response to Gemini format before translation
|
||||
// This ensures Imagen responses use the same format as gemini-3-pro-image-preview
|
||||
if isImagenModel(baseModel) {
|
||||
data = convertImagenToGeminiResponse(data, baseModel)
|
||||
}
|
||||
|
||||
// Standard Gemini translation (works for both Gemini and converted Imagen responses)
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
var param any
|
||||
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
||||
@@ -258,37 +417,31 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
|
||||
// executeWithAPIKey handles authentication using API key credentials.
|
||||
func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
action := "generateContent"
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
action := getVertexAction(baseModel, false)
|
||||
if req.Metadata != nil {
|
||||
if a, _ := req.Metadata["action"].(string); a == "countTokens" {
|
||||
action = "countTokens"
|
||||
@@ -299,7 +452,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action)
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -367,37 +520,40 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
|
||||
// executeStreamWithServiceAccount handles streaming authentication using service account credentials.
|
||||
func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
action := getVertexAction(baseModel, true)
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
|
||||
// Imagen models don't support streaming, skip SSE params
|
||||
if !isImagenModel(baseModel) {
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
}
|
||||
body, _ = sjson.DeleteBytes(body, "session_id")
|
||||
|
||||
@@ -487,45 +643,43 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
|
||||
// executeStreamWithAPIKey handles streaming authentication using API key credentials.
|
||||
func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body = fixGeminiImageAspectRatio(baseModel, body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
action := getVertexAction(baseModel, true)
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
|
||||
// Imagen models don't support streaming, skip SSE params
|
||||
if !isImagenModel(baseModel) {
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
}
|
||||
body, _ = sjson.DeleteBytes(body, "session_id")
|
||||
|
||||
@@ -612,26 +766,27 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
|
||||
// countTokensWithServiceAccount counts tokens using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model)
|
||||
|
||||
translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
@@ -688,10 +843,6 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
@@ -699,24 +850,20 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
|
||||
|
||||
translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
@@ -726,7 +873,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
@@ -780,10 +927,6 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
@@ -870,53 +1013,6 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau
|
||||
return tok.AccessToken, nil
|
||||
}
|
||||
|
||||
// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration.
|
||||
// It matches the requested model alias against configured models and returns the actual upstream name.
|
||||
func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveVertexConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth.
|
||||
func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -67,6 +68,8 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
|
||||
|
||||
// Execute performs a non-streaming chat completion request.
|
||||
func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := iflowCreds(auth)
|
||||
if strings.TrimSpace(apiKey) == "" {
|
||||
err = fmt.Errorf("iflow executor: missing api key")
|
||||
@@ -76,7 +79,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
baseURL = iflowauth.DefaultAPIBaseURL
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -85,17 +88,17 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
body = applyIFlowThinkingConfig(body)
|
||||
|
||||
body = preserveReasoningContentInMessages(body)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||
|
||||
@@ -154,6 +157,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
reporter.ensurePublished(ctx)
|
||||
|
||||
var param any
|
||||
// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
|
||||
// the original model name in the response for client compatibility.
|
||||
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
||||
return resp, nil
|
||||
@@ -161,6 +166,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
|
||||
// ExecuteStream performs a streaming chat completion request.
|
||||
func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
apiKey, baseURL := iflowCreds(auth)
|
||||
if strings.TrimSpace(apiKey) == "" {
|
||||
err = fmt.Errorf("iflow executor: missing api key")
|
||||
@@ -170,7 +177,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
baseURL = iflowauth.DefaultAPIBaseURL
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -179,23 +186,22 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
body = applyIFlowThinkingConfig(body)
|
||||
|
||||
body = preserveReasoningContentInMessages(body)
|
||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||
body = ensureToolsArray(body)
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||
|
||||
@@ -278,11 +284,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
|
||||
func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
enc, err := tokenizerForModel(req.Model)
|
||||
enc, err := tokenizerForModel(baseModel)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err)
|
||||
}
|
||||
@@ -520,41 +528,3 @@ func preserveReasoningContentInMessages(body []byte) []byte {
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
|
||||
// This should be called after NormalizeThinkingConfig has processed the payload.
|
||||
//
|
||||
// Model-specific handling:
|
||||
// - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
|
||||
// - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
||||
func applyIFlowThinkingConfig(body []byte) []byte {
|
||||
effort := gjson.GetBytes(body, "reasoning_effort")
|
||||
if !effort.Exists() {
|
||||
return body
|
||||
}
|
||||
|
||||
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||
val := strings.ToLower(strings.TrimSpace(effort.String()))
|
||||
enableThinking := val != "none" && val != ""
|
||||
|
||||
// Remove reasoning_effort as we'll convert to model-specific format
|
||||
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
||||
body, _ = sjson.DeleteBytes(body, "thinking")
|
||||
|
||||
// GLM-4.6/4.7: Use chat_template_kwargs
|
||||
if strings.HasPrefix(model, "glm-4") {
|
||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||
if enableThinking {
|
||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
// MiniMax M2/M2.1: Use reasoning_split
|
||||
if strings.HasPrefix(model, "minimax-m2") {
|
||||
body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
|
||||
return body
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
67
internal/runtime/executor/iflow_executor_test.go
Normal file
67
internal/runtime/executor/iflow_executor_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
)
|
||||
|
||||
func TestIFlowExecutorParseSuffix(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantBase string
|
||||
wantLevel string
|
||||
}{
|
||||
{"no suffix", "glm-4", "glm-4", ""},
|
||||
{"glm with suffix", "glm-4.1-flash(high)", "glm-4.1-flash", "high"},
|
||||
{"minimax no suffix", "minimax-m2", "minimax-m2", ""},
|
||||
{"minimax with suffix", "minimax-m2.1(medium)", "minimax-m2.1", "medium"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := thinking.ParseSuffix(tt.model)
|
||||
if result.ModelName != tt.wantBase {
|
||||
t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreserveReasoningContentInMessages(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input []byte
|
||||
want []byte // nil means output should equal input
|
||||
}{
|
||||
{
|
||||
"non-glm model passthrough",
|
||||
[]byte(`{"model":"gpt-4","messages":[]}`),
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"glm model with empty messages",
|
||||
[]byte(`{"model":"glm-4","messages":[]}`),
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"glm model preserves existing reasoning_content",
|
||||
[]byte(`{"model":"glm-4","messages":[{"role":"assistant","content":"hi","reasoning_content":"thinking..."}]}`),
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := preserveReasoningContentInMessages(tt.input)
|
||||
want := tt.want
|
||||
if want == nil {
|
||||
want = tt.input
|
||||
}
|
||||
if string(got) != string(want) {
|
||||
t.Errorf("preserveReasoningContentInMessages() = %s, want %s", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
@@ -69,7 +70,9 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
baseURL, apiKey := e.resolveCredentials(auth)
|
||||
@@ -85,18 +88,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
|
||||
modelOverride := e.resolveUpstreamModel(req.Model, auth)
|
||||
if modelOverride != "" {
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
|
||||
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
|
||||
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
|
||||
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
|
||||
|
||||
translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
@@ -168,7 +166,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
baseURL, apiKey := e.resolveCredentials(auth)
|
||||
@@ -176,24 +176,20 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
err = statusErr{code: http.StatusUnauthorized, msg: "missing provider baseURL"}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
originalPayload := bytes.Clone(req.Payload)
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
modelOverride := e.resolveUpstreamModel(req.Model, auth)
|
||||
if modelOverride != "" {
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
|
||||
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
|
||||
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
|
||||
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
|
||||
|
||||
translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
@@ -293,14 +289,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
modelForCounting := req.Model
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
modelForCounting = modelOverride
|
||||
modelForCounting := baseModel
|
||||
|
||||
translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
|
||||
enc, err := tokenizerForModel(modelForCounting)
|
||||
@@ -336,53 +335,6 @@ func (e *OpenAICompatExecutor) resolveCredentials(auth *cliproxyauth.Auth) (base
|
||||
return
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
if alias == "" || auth == nil || e.cfg == nil {
|
||||
return ""
|
||||
}
|
||||
compat := e.resolveCompatConfig(auth)
|
||||
if compat == nil {
|
||||
return ""
|
||||
}
|
||||
for i := range compat.Models {
|
||||
model := compat.Models[i]
|
||||
if model.Alias != "" {
|
||||
if strings.EqualFold(model.Alias, alias) {
|
||||
if model.Name != "" {
|
||||
return model.Name
|
||||
}
|
||||
return alias
|
||||
}
|
||||
continue
|
||||
}
|
||||
if strings.EqualFold(model.Name, alias) {
|
||||
return model.Name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
|
||||
trimmed := strings.TrimSpace(model)
|
||||
if trimmed == "" || e == nil || e.cfg == nil {
|
||||
return false
|
||||
}
|
||||
compat := e.resolveCompatConfig(auth)
|
||||
if compat == nil || len(compat.Models) == 0 {
|
||||
return false
|
||||
}
|
||||
for i := range compat.Models {
|
||||
entry := compat.Models[i]
|
||||
if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
|
||||
return true
|
||||
}
|
||||
if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
|
||||
@@ -1,109 +1,14 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||
func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
// Use the alias from metadata if available, as it's registered in the global registry
|
||||
// with thinking metadata; the upstream model name may not be registered.
|
||||
lookupModel := util.ResolveOriginalModel(model, metadata)
|
||||
|
||||
// Determine which model to use for thinking support check.
|
||||
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
|
||||
thinkingModel := lookupModel
|
||||
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
|
||||
thinkingModel = model
|
||||
}
|
||||
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(thinkingModel) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||
func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||
// Use the alias from metadata if available, as it's registered in the global registry
|
||||
// with thinking metadata; the upstream model name may not be registered.
|
||||
lookupModel := util.ResolveOriginalModel(model, metadata)
|
||||
|
||||
// Determine which model to use for thinking support check.
|
||||
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
|
||||
thinkingModel := lookupModel
|
||||
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
|
||||
thinkingModel = model
|
||||
}
|
||||
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(thinkingModel) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
|
||||
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
|
||||
// overwriting caller-provided values to honor suffix/default metadata priority.
|
||||
func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
if field == "" {
|
||||
return payload
|
||||
}
|
||||
baseModel := util.ResolveOriginalModel(model, metadata)
|
||||
if baseModel == "" {
|
||||
baseModel = model
|
||||
}
|
||||
if !util.ModelSupportsThinking(baseModel) && !allowCompat {
|
||||
return payload
|
||||
}
|
||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||
if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
|
||||
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
|
||||
if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
|
||||
if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
|
||||
if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
|
||||
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
|
||||
// paths as relative to the provided root path (for example, "request" for Gemini CLI)
|
||||
// and restricts matches to the given protocol when supplied. Defaults are checked
|
||||
@@ -113,13 +18,14 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
|
||||
return payload
|
||||
}
|
||||
rules := cfg.Payload
|
||||
if len(rules.Default) == 0 && len(rules.Override) == 0 {
|
||||
if len(rules.Default) == 0 && len(rules.DefaultRaw) == 0 && len(rules.Override) == 0 && len(rules.OverrideRaw) == 0 {
|
||||
return payload
|
||||
}
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
return payload
|
||||
}
|
||||
candidates := payloadModelCandidates(cfg, model, protocol)
|
||||
out := payload
|
||||
source := original
|
||||
if len(source) == 0 {
|
||||
@@ -129,7 +35,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
|
||||
// Apply default rules: first write wins per field across all matching rules.
|
||||
for i := range rules.Default {
|
||||
rule := &rules.Default[i]
|
||||
if !payloadRuleMatchesModel(rule, model, protocol) {
|
||||
if !payloadRuleMatchesModels(rule, protocol, candidates) {
|
||||
continue
|
||||
}
|
||||
for path, value := range rule.Params {
|
||||
@@ -151,10 +57,39 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
|
||||
appliedDefaults[fullPath] = struct{}{}
|
||||
}
|
||||
}
|
||||
// Apply default raw rules: first write wins per field across all matching rules.
|
||||
for i := range rules.DefaultRaw {
|
||||
rule := &rules.DefaultRaw[i]
|
||||
if !payloadRuleMatchesModels(rule, protocol, candidates) {
|
||||
continue
|
||||
}
|
||||
for path, value := range rule.Params {
|
||||
fullPath := buildPayloadPath(root, path)
|
||||
if fullPath == "" {
|
||||
continue
|
||||
}
|
||||
if gjson.GetBytes(source, fullPath).Exists() {
|
||||
continue
|
||||
}
|
||||
if _, ok := appliedDefaults[fullPath]; ok {
|
||||
continue
|
||||
}
|
||||
rawValue, ok := payloadRawValue(value)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
|
||||
if errSet != nil {
|
||||
continue
|
||||
}
|
||||
out = updated
|
||||
appliedDefaults[fullPath] = struct{}{}
|
||||
}
|
||||
}
|
||||
// Apply override rules: last write wins per field across all matching rules.
|
||||
for i := range rules.Override {
|
||||
rule := &rules.Override[i]
|
||||
if !payloadRuleMatchesModel(rule, model, protocol) {
|
||||
if !payloadRuleMatchesModels(rule, protocol, candidates) {
|
||||
continue
|
||||
}
|
||||
for path, value := range rule.Params {
|
||||
@@ -169,9 +104,43 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
|
||||
out = updated
|
||||
}
|
||||
}
|
||||
// Apply override raw rules: last write wins per field across all matching rules.
|
||||
for i := range rules.OverrideRaw {
|
||||
rule := &rules.OverrideRaw[i]
|
||||
if !payloadRuleMatchesModels(rule, protocol, candidates) {
|
||||
continue
|
||||
}
|
||||
for path, value := range rule.Params {
|
||||
fullPath := buildPayloadPath(root, path)
|
||||
if fullPath == "" {
|
||||
continue
|
||||
}
|
||||
rawValue, ok := payloadRawValue(value)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
|
||||
if errSet != nil {
|
||||
continue
|
||||
}
|
||||
out = updated
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func payloadRuleMatchesModels(rule *config.PayloadRule, protocol string, models []string) bool {
|
||||
if rule == nil || len(models) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, model := range models {
|
||||
if payloadRuleMatchesModel(rule, model, protocol) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) bool {
|
||||
if rule == nil {
|
||||
return false
|
||||
@@ -194,6 +163,65 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b
|
||||
return false
|
||||
}
|
||||
|
||||
func payloadModelCandidates(cfg *config.Config, model, protocol string) []string {
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
return nil
|
||||
}
|
||||
candidates := []string{model}
|
||||
if cfg == nil {
|
||||
return candidates
|
||||
}
|
||||
aliases := payloadModelAliases(cfg, model, protocol)
|
||||
if len(aliases) == 0 {
|
||||
return candidates
|
||||
}
|
||||
seen := map[string]struct{}{strings.ToLower(model): struct{}{}}
|
||||
for _, alias := range aliases {
|
||||
alias = strings.TrimSpace(alias)
|
||||
if alias == "" {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(alias)
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
candidates = append(candidates, alias)
|
||||
}
|
||||
return candidates
|
||||
}
|
||||
|
||||
func payloadModelAliases(cfg *config.Config, model, protocol string) []string {
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
return nil
|
||||
}
|
||||
channel := strings.ToLower(strings.TrimSpace(protocol))
|
||||
if channel == "" {
|
||||
return nil
|
||||
}
|
||||
entries := cfg.OAuthModelAlias[channel]
|
||||
if len(entries) == 0 {
|
||||
return nil
|
||||
}
|
||||
aliases := make([]string, 0, 2)
|
||||
for _, entry := range entries {
|
||||
if !strings.EqualFold(strings.TrimSpace(entry.Name), model) {
|
||||
continue
|
||||
}
|
||||
alias := strings.TrimSpace(entry.Alias)
|
||||
if alias == "" {
|
||||
continue
|
||||
}
|
||||
aliases = append(aliases, alias)
|
||||
}
|
||||
return aliases
|
||||
}
|
||||
|
||||
// buildPayloadPath combines an optional root path with a relative parameter path.
|
||||
// When root is empty, the parameter path is used as-is. When root is non-empty,
|
||||
// the parameter path is treated as relative to root.
|
||||
@@ -212,6 +240,24 @@ func buildPayloadPath(root, path string) string {
|
||||
return r + "." + p
|
||||
}
|
||||
|
||||
func payloadRawValue(value any) ([]byte, bool) {
|
||||
if value == nil {
|
||||
return nil, false
|
||||
}
|
||||
switch typed := value.(type) {
|
||||
case string:
|
||||
return []byte(typed), true
|
||||
case []byte:
|
||||
return typed, true
|
||||
default:
|
||||
raw, errMarshal := json.Marshal(typed)
|
||||
if errMarshal != nil {
|
||||
return nil, false
|
||||
}
|
||||
return raw, true
|
||||
}
|
||||
}
|
||||
|
||||
// matchModelPattern performs simple wildcard matching where '*' matches zero or more characters.
|
||||
// Examples:
|
||||
//
|
||||
@@ -256,102 +302,3 @@ func matchModelPattern(pattern, model string) bool {
|
||||
}
|
||||
return pi == len(pattern)
|
||||
}
|
||||
|
||||
// NormalizeThinkingConfig normalizes thinking-related fields in the payload
|
||||
// based on model capabilities. For models without thinking support, it strips
|
||||
// reasoning fields. For models with level-based thinking, it validates and
|
||||
// normalizes the reasoning effort level. For models with numeric budget thinking,
|
||||
// it strips the effort string fields.
|
||||
func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return payload
|
||||
}
|
||||
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
if allowCompat {
|
||||
return payload
|
||||
}
|
||||
return StripThinkingFields(payload, false)
|
||||
}
|
||||
|
||||
if util.ModelUsesThinkingLevels(model) {
|
||||
return NormalizeReasoningEffortLevel(payload, model)
|
||||
}
|
||||
|
||||
// Model supports thinking but uses numeric budgets, not levels.
|
||||
// Strip effort string fields since they are not applicable.
|
||||
return StripThinkingFields(payload, true)
|
||||
}
|
||||
|
||||
// StripThinkingFields removes thinking-related fields from the payload for
|
||||
// models that do not support thinking. If effortOnly is true, only removes
|
||||
// effort string fields (for models using numeric budgets).
|
||||
func StripThinkingFields(payload []byte, effortOnly bool) []byte {
|
||||
fieldsToRemove := []string{
|
||||
"reasoning_effort",
|
||||
"reasoning.effort",
|
||||
}
|
||||
if !effortOnly {
|
||||
fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...)
|
||||
}
|
||||
out := payload
|
||||
for _, field := range fieldsToRemove {
|
||||
if gjson.GetBytes(out, field).Exists() {
|
||||
out, _ = sjson.DeleteBytes(out, field)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
|
||||
// or reasoning.effort field for level-based thinking models.
|
||||
func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
|
||||
out := payload
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
|
||||
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
|
||||
// downgrading requests.
|
||||
func ValidateThinkingConfig(payload []byte, model string) error {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return nil
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
|
||||
return nil
|
||||
}
|
||||
|
||||
levels := util.GetModelThinkingLevels(model)
|
||||
checkField := func(path string) error {
|
||||
if effort := gjson.GetBytes(payload, path); effort.Exists() {
|
||||
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
|
||||
return statusErr{
|
||||
code: http.StatusBadRequest,
|
||||
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := checkField("reasoning_effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkField("reasoning.effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -65,12 +66,14 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
token, baseURL := qwenCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, baseURL := qwenCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://portal.qwen.ai/v1"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -79,15 +82,16 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
@@ -140,18 +144,22 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
reporter.publish(ctx, parseOpenAIUsage(data))
|
||||
var param any
|
||||
// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
|
||||
// the original model name in the response for client compatibility.
|
||||
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
token, baseURL := qwenCreds(auth)
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
token, baseURL := qwenCreds(auth)
|
||||
if baseURL == "" {
|
||||
baseURL = "https://portal.qwen.ai/v1"
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
from := opts.SourceFormat
|
||||
@@ -160,15 +168,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
if len(opts.OriginalRequest) > 0 {
|
||||
originalPayload = bytes.Clone(opts.OriginalRequest)
|
||||
}
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", baseModel)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||
// This will have no real consequences. It's just to scare Qwen3.
|
||||
@@ -176,7 +184,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
|
||||
body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
@@ -256,13 +264,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
|
||||
func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
|
||||
|
||||
modelName := gjson.GetBytes(body, "model").String()
|
||||
if strings.TrimSpace(modelName) == "" {
|
||||
modelName = req.Model
|
||||
modelName = baseModel
|
||||
}
|
||||
|
||||
enc, err := tokenizerForModel(modelName)
|
||||
|
||||
30
internal/runtime/executor/qwen_executor_test.go
Normal file
30
internal/runtime/executor/qwen_executor_test.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
)
|
||||
|
||||
func TestQwenExecutorParseSuffix(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
wantBase string
|
||||
wantLevel string
|
||||
}{
|
||||
{"no suffix", "qwen-max", "qwen-max", ""},
|
||||
{"with level suffix", "qwen-max(high)", "qwen-max", "high"},
|
||||
{"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"},
|
||||
{"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := thinking.ParseSuffix(tt.model)
|
||||
if result.ModelName != tt.wantBase {
|
||||
t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
11
internal/runtime/executor/thinking_providers.go
Normal file
11
internal/runtime/executor/thinking_providers.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
|
||||
_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
|
||||
)
|
||||
487
internal/thinking/apply.go
Normal file
487
internal/thinking/apply.go
Normal file
@@ -0,0 +1,487 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// providerAppliers maps provider names to their ProviderApplier implementations.
|
||||
var providerAppliers = map[string]ProviderApplier{
|
||||
"gemini": nil,
|
||||
"gemini-cli": nil,
|
||||
"claude": nil,
|
||||
"openai": nil,
|
||||
"codex": nil,
|
||||
"iflow": nil,
|
||||
"antigravity": nil,
|
||||
}
|
||||
|
||||
// GetProviderApplier returns the ProviderApplier for the given provider name.
|
||||
// Returns nil if the provider is not registered.
|
||||
func GetProviderApplier(provider string) ProviderApplier {
|
||||
return providerAppliers[provider]
|
||||
}
|
||||
|
||||
// RegisterProvider registers a provider applier by name.
|
||||
func RegisterProvider(name string, applier ProviderApplier) {
|
||||
providerAppliers[name] = applier
|
||||
}
|
||||
|
||||
// IsUserDefinedModel reports whether the model is a user-defined model that should
|
||||
// have thinking configuration passed through without validation.
|
||||
//
|
||||
// User-defined models are configured via config file's models[] array
|
||||
// (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models
|
||||
// are marked with UserDefined=true at registration time.
|
||||
//
|
||||
// User-defined models should have their thinking configuration applied directly,
|
||||
// letting the upstream service validate the configuration.
|
||||
func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
|
||||
if modelInfo == nil {
|
||||
return true
|
||||
}
|
||||
return modelInfo.UserDefined
|
||||
}
|
||||
|
||||
// ApplyThinking applies thinking configuration to a request body.
|
||||
//
|
||||
// This is the unified entry point for all providers. It follows the processing
|
||||
// order defined in FR25: route check → model capability query → config extraction
|
||||
// → validation → application.
|
||||
//
|
||||
// Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
|
||||
// the suffix configuration takes priority over any thinking parameters in the request body.
|
||||
// This enables users to override thinking settings via the model name without modifying their
|
||||
// request payload.
|
||||
//
|
||||
// Parameters:
|
||||
// - body: Original request body JSON
|
||||
// - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
|
||||
// - fromFormat: Source request format (e.g., openai, codex, gemini)
|
||||
// - toFormat: Target provider format for the request body (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
|
||||
// - providerKey: Provider identifier used for registry model lookups (may differ from toFormat, e.g., openrouter -> openai)
|
||||
//
|
||||
// Returns:
|
||||
// - Modified request body JSON with thinking configuration applied
|
||||
// - Error if validation fails (ThinkingError). On error, the original body
|
||||
// is returned (not nil) to enable defensive programming patterns.
|
||||
//
|
||||
// Passthrough behavior (returns original body without error):
|
||||
// - Unknown provider (not in providerAppliers map)
|
||||
// - modelInfo.Thinking is nil (model doesn't support thinking)
|
||||
//
|
||||
// Note: Unknown models (modelInfo is nil) are treated as user-defined models: we skip
|
||||
// validation and still apply the thinking config so the upstream can validate it.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// // With suffix - suffix config takes priority
|
||||
// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini", "gemini")
|
||||
//
|
||||
// // Without suffix - uses body config
|
||||
// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini", "gemini")
|
||||
func ApplyThinking(body []byte, model string, fromFormat string, toFormat string, providerKey string) ([]byte, error) {
|
||||
providerFormat := strings.ToLower(strings.TrimSpace(toFormat))
|
||||
providerKey = strings.ToLower(strings.TrimSpace(providerKey))
|
||||
if providerKey == "" {
|
||||
providerKey = providerFormat
|
||||
}
|
||||
fromFormat = strings.ToLower(strings.TrimSpace(fromFormat))
|
||||
if fromFormat == "" {
|
||||
fromFormat = providerFormat
|
||||
}
|
||||
// 1. Route check: Get provider applier
|
||||
applier := GetProviderApplier(providerFormat)
|
||||
if applier == nil {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": model,
|
||||
}).Debug("thinking: unknown provider, passthrough |")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 2. Parse suffix and get modelInfo
|
||||
suffixResult := ParseSuffix(model)
|
||||
baseModel := suffixResult.ModelName
|
||||
// Use provider-specific lookup to handle capability differences across providers.
|
||||
modelInfo := registry.LookupModelInfo(baseModel, providerKey)
|
||||
|
||||
// 3. Model capability check
|
||||
// Unknown models are treated as user-defined so thinking config can still be applied.
|
||||
// The upstream service is responsible for validating the configuration.
|
||||
if IsUserDefinedModel(modelInfo) {
|
||||
return applyUserDefinedModel(body, modelInfo, fromFormat, providerFormat, suffixResult)
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
config := extractThinkingConfig(body, providerFormat)
|
||||
if hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"model": baseModel,
|
||||
"provider": providerFormat,
|
||||
}).Debug("thinking: model does not support thinking, stripping config |")
|
||||
return StripThinkingConfig(body, providerFormat), nil
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": baseModel,
|
||||
}).Debug("thinking: model does not support thinking, passthrough |")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 4. Get config: suffix priority over body
|
||||
var config ThinkingConfig
|
||||
if suffixResult.HasSuffix {
|
||||
config = parseSuffixToConfig(suffixResult.RawSuffix, providerFormat, model)
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": model,
|
||||
"mode": config.Mode,
|
||||
"budget": config.Budget,
|
||||
"level": config.Level,
|
||||
}).Debug("thinking: config from model suffix |")
|
||||
} else {
|
||||
config = extractThinkingConfig(body, providerFormat)
|
||||
if hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": modelInfo.ID,
|
||||
"mode": config.Mode,
|
||||
"budget": config.Budget,
|
||||
"level": config.Level,
|
||||
}).Debug("thinking: original config from request |")
|
||||
}
|
||||
}
|
||||
|
||||
if !hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": modelInfo.ID,
|
||||
}).Debug("thinking: no config found, passthrough |")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// 5. Validate and normalize configuration
|
||||
validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat, suffixResult.HasSuffix)
|
||||
if err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": modelInfo.ID,
|
||||
"error": err.Error(),
|
||||
}).Warn("thinking: validation failed |")
|
||||
// Return original body on validation failure (defensive programming).
|
||||
// This ensures callers who ignore the error won't receive nil body.
|
||||
// The upstream service will decide how to handle the unmodified request.
|
||||
return body, err
|
||||
}
|
||||
|
||||
// Defensive check: ValidateConfig should never return (nil, nil)
|
||||
if validated == nil {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": modelInfo.ID,
|
||||
}).Warn("thinking: ValidateConfig returned nil config without error, passthrough |")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"provider": providerFormat,
|
||||
"model": modelInfo.ID,
|
||||
"mode": validated.Mode,
|
||||
"budget": validated.Budget,
|
||||
"level": validated.Level,
|
||||
}).Debug("thinking: processed config to apply |")
|
||||
|
||||
// 6. Apply configuration using provider-specific applier
|
||||
return applier.Apply(body, *validated, modelInfo)
|
||||
}
|
||||
|
||||
// parseSuffixToConfig converts a raw suffix string to ThinkingConfig.
|
||||
//
|
||||
// Parsing priority:
|
||||
// 1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto
|
||||
// 2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel
|
||||
// 3. Numeric values: positive integers → ModeBudget, 0 → ModeNone
|
||||
//
|
||||
// If none of the above match, returns empty ThinkingConfig (treated as no config).
|
||||
func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig {
|
||||
// 1. Try special values first (none, auto, -1)
|
||||
if mode, ok := ParseSpecialSuffix(rawSuffix); ok {
|
||||
switch mode {
|
||||
case ModeNone:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case ModeAuto:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Try level parsing (minimal, low, medium, high, xhigh)
|
||||
if level, ok := ParseLevelSuffix(rawSuffix); ok {
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: level}
|
||||
}
|
||||
|
||||
// 3. Try numeric parsing
|
||||
if budget, ok := ParseNumericSuffix(rawSuffix); ok {
|
||||
if budget == 0 {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: budget}
|
||||
}
|
||||
|
||||
// Unknown suffix format - return empty config
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"raw_suffix": rawSuffix,
|
||||
}).Debug("thinking: unknown suffix format, treating as no config |")
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// applyUserDefinedModel applies thinking configuration for user-defined models
|
||||
// without ThinkingSupport validation.
|
||||
func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, fromFormat, toFormat string, suffixResult SuffixResult) ([]byte, error) {
|
||||
// Get model ID for logging
|
||||
modelID := ""
|
||||
if modelInfo != nil {
|
||||
modelID = modelInfo.ID
|
||||
} else {
|
||||
modelID = suffixResult.ModelName
|
||||
}
|
||||
|
||||
// Get config: suffix priority over body
|
||||
var config ThinkingConfig
|
||||
if suffixResult.HasSuffix {
|
||||
config = parseSuffixToConfig(suffixResult.RawSuffix, toFormat, modelID)
|
||||
} else {
|
||||
config = extractThinkingConfig(body, toFormat)
|
||||
}
|
||||
|
||||
if !hasThinkingConfig(config) {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelID,
|
||||
"provider": toFormat,
|
||||
}).Debug("thinking: user-defined model, passthrough (no config) |")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
applier := GetProviderApplier(toFormat)
|
||||
if applier == nil {
|
||||
log.WithFields(log.Fields{
|
||||
"model": modelID,
|
||||
"provider": toFormat,
|
||||
}).Debug("thinking: user-defined model, passthrough (unknown provider) |")
|
||||
return body, nil
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"provider": toFormat,
|
||||
"model": modelID,
|
||||
"mode": config.Mode,
|
||||
"budget": config.Budget,
|
||||
"level": config.Level,
|
||||
}).Debug("thinking: applying config for user-defined model (skip validation)")
|
||||
|
||||
config = normalizeUserDefinedConfig(config, fromFormat, toFormat)
|
||||
return applier.Apply(body, config, modelInfo)
|
||||
}
|
||||
|
||||
func normalizeUserDefinedConfig(config ThinkingConfig, fromFormat, toFormat string) ThinkingConfig {
|
||||
if config.Mode != ModeLevel {
|
||||
return config
|
||||
}
|
||||
if !isBudgetBasedProvider(toFormat) || !isLevelBasedProvider(fromFormat) {
|
||||
return config
|
||||
}
|
||||
budget, ok := ConvertLevelToBudget(string(config.Level))
|
||||
if !ok {
|
||||
return config
|
||||
}
|
||||
config.Mode = ModeBudget
|
||||
config.Budget = budget
|
||||
config.Level = ""
|
||||
return config
|
||||
}
|
||||
|
||||
// extractThinkingConfig extracts provider-specific thinking config from request body.
|
||||
func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
switch provider {
|
||||
case "claude":
|
||||
return extractClaudeConfig(body)
|
||||
case "gemini", "gemini-cli", "antigravity":
|
||||
return extractGeminiConfig(body, provider)
|
||||
case "openai":
|
||||
return extractOpenAIConfig(body)
|
||||
case "codex":
|
||||
return extractCodexConfig(body)
|
||||
case "iflow":
|
||||
config := extractIFlowConfig(body)
|
||||
if hasThinkingConfig(config) {
|
||||
return config
|
||||
}
|
||||
return extractOpenAIConfig(body)
|
||||
default:
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
}
|
||||
|
||||
func hasThinkingConfig(config ThinkingConfig) bool {
|
||||
return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
|
||||
}
|
||||
|
||||
// extractClaudeConfig extracts thinking configuration from Claude format request body.
|
||||
//
|
||||
// Claude API format:
|
||||
// - thinking.type: "enabled" or "disabled"
|
||||
// - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget)
|
||||
//
|
||||
// Priority: thinking.type="disabled" takes precedence over budget_tokens.
|
||||
// When type="enabled" without budget_tokens, returns ModeAuto to indicate
|
||||
// the user wants thinking enabled but didn't specify a budget.
|
||||
func extractClaudeConfig(body []byte) ThinkingConfig {
|
||||
thinkingType := gjson.GetBytes(body, "thinking.type").String()
|
||||
if thinkingType == "disabled" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
// Check budget_tokens
|
||||
if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() {
|
||||
value := int(budget.Int())
|
||||
switch value {
|
||||
case 0:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case -1:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: value}
|
||||
}
|
||||
}
|
||||
|
||||
// If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified)
|
||||
if thinkingType == "enabled" {
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractGeminiConfig extracts thinking configuration from Gemini format request body.
|
||||
//
|
||||
// Gemini API format:
|
||||
// - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3)
|
||||
// - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5)
|
||||
//
|
||||
// For gemini-cli and antigravity providers, the path is prefixed with "request.".
|
||||
//
|
||||
// Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format).
|
||||
// This allows newer Gemini 3 level-based configs to take precedence.
|
||||
func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
|
||||
prefix := "generationConfig.thinkingConfig"
|
||||
if provider == "gemini-cli" || provider == "antigravity" {
|
||||
prefix = "request.generationConfig.thinkingConfig"
|
||||
}
|
||||
|
||||
// Check thinkingLevel first (Gemini 3 format takes precedence)
|
||||
if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
|
||||
value := level.String()
|
||||
switch value {
|
||||
case "none":
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case "auto":
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
}
|
||||
|
||||
// Check thinkingBudget (Gemini 2.5 format)
|
||||
if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
|
||||
value := int(budget.Int())
|
||||
switch value {
|
||||
case 0:
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
case -1:
|
||||
return ThinkingConfig{Mode: ModeAuto, Budget: -1}
|
||||
default:
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: value}
|
||||
}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractOpenAIConfig extracts thinking configuration from OpenAI format request body.
|
||||
//
|
||||
// OpenAI API format:
|
||||
// - reasoning_effort: "none", "low", "medium", "high" (discrete levels)
|
||||
//
|
||||
// OpenAI uses level-based thinking configuration only, no numeric budget support.
|
||||
// The "none" value is treated specially to return ModeNone.
|
||||
func extractOpenAIConfig(body []byte) ThinkingConfig {
|
||||
// Check reasoning_effort (OpenAI Chat Completions format)
|
||||
if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() {
|
||||
value := effort.String()
|
||||
if value == "none" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractCodexConfig extracts thinking configuration from Codex format request body.
|
||||
//
|
||||
// Codex API format (OpenAI Responses API):
|
||||
// - reasoning.effort: "none", "low", "medium", "high"
|
||||
//
|
||||
// This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort".
|
||||
func extractCodexConfig(body []byte) ThinkingConfig {
|
||||
// Check reasoning.effort (Codex / OpenAI Responses API format)
|
||||
if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() {
|
||||
value := effort.String()
|
||||
if value == "none" {
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
|
||||
// extractIFlowConfig extracts thinking configuration from iFlow format request body.
|
||||
//
|
||||
// iFlow API format (supports multiple model families):
|
||||
// - GLM format: chat_template_kwargs.enable_thinking (boolean)
|
||||
// - MiniMax format: reasoning_split (boolean)
|
||||
//
|
||||
// Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled".
|
||||
// The actual budget/configuration is determined by the iFlow applier based on model capabilities.
|
||||
// Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off.
|
||||
func extractIFlowConfig(body []byte) ThinkingConfig {
|
||||
// GLM format: chat_template_kwargs.enable_thinking
|
||||
if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() {
|
||||
if enabled.Bool() {
|
||||
// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: 1}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
// MiniMax format: reasoning_split
|
||||
if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() {
|
||||
if split.Bool() {
|
||||
// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
|
||||
return ThinkingConfig{Mode: ModeBudget, Budget: 1}
|
||||
}
|
||||
return ThinkingConfig{Mode: ModeNone, Budget: 0}
|
||||
}
|
||||
|
||||
return ThinkingConfig{}
|
||||
}
|
||||
142
internal/thinking/convert.go
Normal file
142
internal/thinking/convert.go
Normal file
@@ -0,0 +1,142 @@
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
// levelToBudgetMap defines the standard Level → Budget mapping.
|
||||
// All keys are lowercase; lookups should use strings.ToLower.
|
||||
var levelToBudgetMap = map[string]int{
|
||||
"none": 0,
|
||||
"auto": -1,
|
||||
"minimal": 512,
|
||||
"low": 1024,
|
||||
"medium": 8192,
|
||||
"high": 24576,
|
||||
"xhigh": 32768,
|
||||
}
|
||||
|
||||
// ConvertLevelToBudget converts a thinking level to a budget value.
|
||||
//
|
||||
// This is a semantic conversion that maps discrete levels to numeric budgets.
|
||||
// Level matching is case-insensitive.
|
||||
//
|
||||
// Level → Budget mapping:
|
||||
// - none → 0
|
||||
// - auto → -1
|
||||
// - minimal → 512
|
||||
// - low → 1024
|
||||
// - medium → 8192
|
||||
// - high → 24576
|
||||
// - xhigh → 32768
|
||||
//
|
||||
// Returns:
|
||||
// - budget: The converted budget value
|
||||
// - ok: true if level is valid, false otherwise
|
||||
func ConvertLevelToBudget(level string) (int, bool) {
|
||||
budget, ok := levelToBudgetMap[strings.ToLower(level)]
|
||||
return budget, ok
|
||||
}
|
||||
|
||||
// BudgetThreshold constants define the upper bounds for each thinking level.
|
||||
// These are used by ConvertBudgetToLevel for range-based mapping.
|
||||
const (
|
||||
// ThresholdMinimal is the upper bound for "minimal" level (1-512)
|
||||
ThresholdMinimal = 512
|
||||
// ThresholdLow is the upper bound for "low" level (513-1024)
|
||||
ThresholdLow = 1024
|
||||
// ThresholdMedium is the upper bound for "medium" level (1025-8192)
|
||||
ThresholdMedium = 8192
|
||||
// ThresholdHigh is the upper bound for "high" level (8193-24576)
|
||||
ThresholdHigh = 24576
|
||||
)
|
||||
|
||||
// ConvertBudgetToLevel converts a budget value to the nearest thinking level.
|
||||
//
|
||||
// This is a semantic conversion that maps numeric budgets to discrete levels.
|
||||
// Uses threshold-based mapping for range conversion.
|
||||
//
|
||||
// Budget → Level thresholds:
|
||||
// - -1 → auto
|
||||
// - 0 → none
|
||||
// - 1-512 → minimal
|
||||
// - 513-1024 → low
|
||||
// - 1025-8192 → medium
|
||||
// - 8193-24576 → high
|
||||
// - 24577+ → xhigh
|
||||
//
|
||||
// Returns:
|
||||
// - level: The converted thinking level string
|
||||
// - ok: true if budget is valid, false for invalid negatives (< -1)
|
||||
func ConvertBudgetToLevel(budget int) (string, bool) {
|
||||
switch {
|
||||
case budget < -1:
|
||||
// Invalid negative values
|
||||
return "", false
|
||||
case budget == -1:
|
||||
return string(LevelAuto), true
|
||||
case budget == 0:
|
||||
return string(LevelNone), true
|
||||
case budget <= ThresholdMinimal:
|
||||
return string(LevelMinimal), true
|
||||
case budget <= ThresholdLow:
|
||||
return string(LevelLow), true
|
||||
case budget <= ThresholdMedium:
|
||||
return string(LevelMedium), true
|
||||
case budget <= ThresholdHigh:
|
||||
return string(LevelHigh), true
|
||||
default:
|
||||
return string(LevelXHigh), true
|
||||
}
|
||||
}
|
||||
|
||||
// ModelCapability describes the thinking format support of a model.
|
||||
type ModelCapability int
|
||||
|
||||
const (
|
||||
// CapabilityUnknown indicates modelInfo is nil (passthrough behavior, internal use).
|
||||
CapabilityUnknown ModelCapability = iota - 1
|
||||
// CapabilityNone indicates model doesn't support thinking (Thinking is nil).
|
||||
CapabilityNone
|
||||
// CapabilityBudgetOnly indicates the model supports numeric budgets only.
|
||||
CapabilityBudgetOnly
|
||||
// CapabilityLevelOnly indicates the model supports discrete levels only.
|
||||
CapabilityLevelOnly
|
||||
// CapabilityHybrid indicates the model supports both budgets and levels.
|
||||
CapabilityHybrid
|
||||
)
|
||||
|
||||
// detectModelCapability determines the thinking format capability of a model.
|
||||
//
|
||||
// This is an internal function used by validation and conversion helpers.
|
||||
// It analyzes the model's ThinkingSupport configuration to classify the model:
|
||||
// - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking)
|
||||
// - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5)
|
||||
// - CapabilityLevelOnly: Has Levels but no Min/Max (OpenAI, iFlow)
|
||||
// - CapabilityHybrid: Has both Min/Max and Levels (Gemini 3)
|
||||
//
|
||||
// Note: Returns a special sentinel value when modelInfo itself is nil (unknown model).
|
||||
func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability {
|
||||
if modelInfo == nil {
|
||||
return CapabilityUnknown // sentinel for "passthrough" behavior
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return CapabilityNone
|
||||
}
|
||||
support := modelInfo.Thinking
|
||||
hasBudget := support.Min > 0 || support.Max > 0
|
||||
hasLevels := len(support.Levels) > 0
|
||||
|
||||
switch {
|
||||
case hasBudget && hasLevels:
|
||||
return CapabilityHybrid
|
||||
case hasBudget:
|
||||
return CapabilityBudgetOnly
|
||||
case hasLevels:
|
||||
return CapabilityLevelOnly
|
||||
default:
|
||||
return CapabilityNone
|
||||
}
|
||||
}
|
||||
82
internal/thinking/errors.go
Normal file
82
internal/thinking/errors.go
Normal file
@@ -0,0 +1,82 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import "net/http"
|
||||
|
||||
// ErrorCode represents the type of thinking configuration error.
|
||||
type ErrorCode string
|
||||
|
||||
// Error codes for thinking configuration processing.
|
||||
const (
|
||||
// ErrInvalidSuffix indicates the suffix format cannot be parsed.
|
||||
// Example: "model(abc" (missing closing parenthesis)
|
||||
ErrInvalidSuffix ErrorCode = "INVALID_SUFFIX"
|
||||
|
||||
// ErrUnknownLevel indicates the level value is not in the valid list.
|
||||
// Example: "model(ultra)" where "ultra" is not a valid level
|
||||
ErrUnknownLevel ErrorCode = "UNKNOWN_LEVEL"
|
||||
|
||||
// ErrThinkingNotSupported indicates the model does not support thinking.
|
||||
// Example: claude-haiku-4-5 does not have thinking capability
|
||||
ErrThinkingNotSupported ErrorCode = "THINKING_NOT_SUPPORTED"
|
||||
|
||||
// ErrLevelNotSupported indicates the model does not support level mode.
|
||||
// Example: using level with a budget-only model
|
||||
ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED"
|
||||
|
||||
// ErrBudgetOutOfRange indicates the budget value is outside model range.
|
||||
// Example: budget 64000 exceeds max 20000
|
||||
ErrBudgetOutOfRange ErrorCode = "BUDGET_OUT_OF_RANGE"
|
||||
|
||||
// ErrProviderMismatch indicates the provider does not match the model.
|
||||
// Example: applying Claude format to a Gemini model
|
||||
ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH"
|
||||
)
|
||||
|
||||
// ThinkingError represents an error that occurred during thinking configuration processing.
|
||||
//
|
||||
// This error type provides structured information about the error, including:
|
||||
// - Code: A machine-readable error code for programmatic handling
|
||||
// - Message: A human-readable description of the error
|
||||
// - Model: The model name related to the error (optional)
|
||||
// - Details: Additional context information (optional)
|
||||
type ThinkingError struct {
|
||||
// Code is the machine-readable error code
|
||||
Code ErrorCode
|
||||
// Message is the human-readable error description.
|
||||
// Should be lowercase, no trailing period, with context if applicable.
|
||||
Message string
|
||||
// Model is the model name related to this error (optional)
|
||||
Model string
|
||||
// Details contains additional context information (optional)
|
||||
Details map[string]interface{}
|
||||
}
|
||||
|
||||
// Error implements the error interface.
|
||||
// Returns the message directly without code prefix.
|
||||
// Use Code field for programmatic error handling.
|
||||
func (e *ThinkingError) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
|
||||
// NewThinkingError creates a new ThinkingError with the given code and message.
|
||||
func NewThinkingError(code ErrorCode, message string) *ThinkingError {
|
||||
return &ThinkingError{
|
||||
Code: code,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
|
||||
// NewThinkingErrorWithModel creates a new ThinkingError with model context.
|
||||
func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingError {
|
||||
return &ThinkingError{
|
||||
Code: code,
|
||||
Message: message,
|
||||
Model: model,
|
||||
}
|
||||
}
|
||||
|
||||
// StatusCode implements a portable status code interface for HTTP handlers.
|
||||
func (e *ThinkingError) StatusCode() int {
|
||||
return http.StatusBadRequest
|
||||
}
|
||||
201
internal/thinking/provider/antigravity/apply.go
Normal file
201
internal/thinking/provider/antigravity/apply.go
Normal file
@@ -0,0 +1,201 @@
|
||||
// Package antigravity implements thinking configuration for Antigravity API format.
|
||||
//
|
||||
// Antigravity uses request.generationConfig.thinkingConfig.* path (same as gemini-cli)
|
||||
// but requires additional normalization for Claude models:
|
||||
// - Ensure thinking budget < max_tokens
|
||||
// - Remove thinkingConfig if budget < minimum allowed
|
||||
package antigravity
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier applies thinking configuration for Antigravity API format.
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new Antigravity thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("antigravity", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Antigravity request body.
|
||||
//
|
||||
// For Claude models, additional constraints are applied:
|
||||
// - Ensure thinking budget < max_tokens
|
||||
// - Remove thinkingConfig if budget < minimum allowed
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if thinking.IsUserDefinedModel(modelInfo) {
|
||||
return a.applyCompatible(body, config, modelInfo)
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
isClaude := strings.Contains(strings.ToLower(modelInfo.ID), "claude")
|
||||
|
||||
// ModeAuto: Always use Budget format with thinkingBudget=-1
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config, modelInfo, isClaude)
|
||||
}
|
||||
if config.Mode == thinking.ModeBudget {
|
||||
return a.applyBudgetFormat(body, config, modelInfo, isClaude)
|
||||
}
|
||||
|
||||
// For non-auto modes, choose format based on model capabilities
|
||||
support := modelInfo.Thinking
|
||||
if len(support.Levels) > 0 {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
return a.applyBudgetFormat(body, config, modelInfo, isClaude)
|
||||
}
|
||||
|
||||
func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
isClaude := false
|
||||
if modelInfo != nil {
|
||||
isClaude = strings.Contains(strings.ToLower(modelInfo.ID), "claude")
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config, modelInfo, isClaude)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
|
||||
return a.applyBudgetFormat(body, config, modelInfo, isClaude)
|
||||
}
|
||||
|
||||
func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
|
||||
result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
|
||||
|
||||
if config.Mode == thinking.ModeNone {
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
|
||||
if config.Level != "" {
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Only handle ModeLevel - budget conversion should be done by upper layer
|
||||
if config.Mode != thinking.ModeLevel {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
level := string(config.Level)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo, isClaude bool) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
|
||||
// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
|
||||
result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
|
||||
|
||||
budget := config.Budget
|
||||
includeThoughts := false
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
includeThoughts = false
|
||||
case thinking.ModeAuto:
|
||||
includeThoughts = true
|
||||
default:
|
||||
includeThoughts = budget > 0
|
||||
}
|
||||
|
||||
// Apply Claude-specific constraints
|
||||
if isClaude && modelInfo != nil {
|
||||
budget, result = a.normalizeClaudeBudget(budget, result, modelInfo)
|
||||
// Check if budget was removed entirely
|
||||
if budget == -2 {
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// normalizeClaudeBudget applies Claude-specific constraints to thinking budget.
|
||||
//
|
||||
// It handles:
|
||||
// - Ensuring thinking budget < max_tokens
|
||||
// - Removing thinkingConfig if budget < minimum allowed
|
||||
//
|
||||
// Returns the normalized budget and updated payload.
|
||||
// Returns budget=-2 as a sentinel indicating thinkingConfig was removed entirely.
|
||||
func (a *Applier) normalizeClaudeBudget(budget int, payload []byte, modelInfo *registry.ModelInfo) (int, []byte) {
|
||||
if modelInfo == nil {
|
||||
return budget, payload
|
||||
}
|
||||
|
||||
// Get effective max tokens
|
||||
effectiveMax, setDefaultMax := a.effectiveMaxTokens(payload, modelInfo)
|
||||
if effectiveMax > 0 && budget >= effectiveMax {
|
||||
budget = effectiveMax - 1
|
||||
}
|
||||
|
||||
// Check minimum budget
|
||||
minBudget := 0
|
||||
if modelInfo.Thinking != nil {
|
||||
minBudget = modelInfo.Thinking.Min
|
||||
}
|
||||
if minBudget > 0 && budget >= 0 && budget < minBudget {
|
||||
// Budget is below minimum, remove thinking config entirely
|
||||
payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
|
||||
return -2, payload
|
||||
}
|
||||
|
||||
// Set default max tokens if needed
|
||||
if setDefaultMax && effectiveMax > 0 {
|
||||
payload, _ = sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax)
|
||||
}
|
||||
|
||||
return budget, payload
|
||||
}
|
||||
|
||||
// effectiveMaxTokens returns the max tokens to cap thinking:
|
||||
// prefer request-provided maxOutputTokens; otherwise fall back to model default.
|
||||
// The boolean indicates whether the value came from the model default (and thus should be written back).
|
||||
func (a *Applier) effectiveMaxTokens(payload []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
|
||||
if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
|
||||
return int(maxTok.Int()), false
|
||||
}
|
||||
if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
|
||||
return modelInfo.MaxCompletionTokens, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
166
internal/thinking/provider/claude/apply.go
Normal file
166
internal/thinking/provider/claude/apply.go
Normal file
@@ -0,0 +1,166 @@
|
||||
// Package claude implements thinking configuration scaffolding for Claude models.
|
||||
//
|
||||
// Claude models use the thinking.budget_tokens format with values in the range
|
||||
// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5),
|
||||
// while older models do not.
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-6
|
||||
package claude
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for Claude models.
|
||||
// This applier is stateless and holds no configuration.
|
||||
type Applier struct{}
|
||||
|
||||
// NewApplier creates a new Claude thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("claude", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Claude request body.
|
||||
//
|
||||
// IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig.
|
||||
// ValidateConfig handles:
|
||||
// - Mode conversion (Level→Budget, Auto→Budget)
|
||||
// - Budget clamping to model range
|
||||
// - ZeroAllowed constraint enforcement
|
||||
//
|
||||
// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged.
|
||||
//
|
||||
// Expected output format when enabled:
|
||||
//
|
||||
// {
|
||||
// "thinking": {
|
||||
// "type": "enabled",
|
||||
// "budget_tokens": 16384
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Expected output format when disabled:
|
||||
//
|
||||
// {
|
||||
// "thinking": {
|
||||
// "type": "disabled"
|
||||
// }
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if thinking.IsUserDefinedModel(modelInfo) {
|
||||
return applyCompatibleClaude(body, config)
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// Only process ModeBudget and ModeNone; other modes pass through
|
||||
// (caller should use ValidateConfig first to normalize modes)
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
// Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced)
|
||||
// Decide enabled/disabled based on budget value
|
||||
if config.Budget == 0 {
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
|
||||
result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
||||
|
||||
// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
|
||||
result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens.
|
||||
// Anthropic API requires this constraint; violating it returns a 400 error.
|
||||
func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte {
|
||||
if budgetTokens <= 0 {
|
||||
return body
|
||||
}
|
||||
|
||||
// Ensure the request satisfies Claude constraints:
|
||||
// 1) Determine effective max_tokens (request overrides model default)
|
||||
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
|
||||
// 3) If the adjusted budget falls below the model minimum, leave the request unchanged
|
||||
// 4) If max_tokens came from model default, write it back into the request
|
||||
|
||||
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
|
||||
if setDefaultMax && effectiveMax > 0 {
|
||||
body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
|
||||
}
|
||||
|
||||
// Compute the budget we would apply after enforcing budget_tokens < max_tokens.
|
||||
adjustedBudget := budgetTokens
|
||||
if effectiveMax > 0 && adjustedBudget >= effectiveMax {
|
||||
adjustedBudget = effectiveMax - 1
|
||||
}
|
||||
|
||||
minBudget := 0
|
||||
if modelInfo != nil && modelInfo.Thinking != nil {
|
||||
minBudget = modelInfo.Thinking.Min
|
||||
}
|
||||
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
||||
// If enforcing the max_tokens constraint would push the budget below the model minimum,
|
||||
// leave the request unchanged.
|
||||
return body
|
||||
}
|
||||
|
||||
if adjustedBudget != budgetTokens {
|
||||
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget)
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
// effectiveMaxTokens returns the max tokens to cap thinking:
|
||||
// prefer request-provided max_tokens; otherwise fall back to model default.
|
||||
// The boolean indicates whether the value came from the model default (and thus should be written back).
|
||||
func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
|
||||
if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 {
|
||||
return int(maxTok.Int()), false
|
||||
}
|
||||
if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
|
||||
return modelInfo.MaxCompletionTokens, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
|
||||
result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
|
||||
return result, nil
|
||||
case thinking.ModeAuto:
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
|
||||
return result, nil
|
||||
default:
|
||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
131
internal/thinking/provider/codex/apply.go
Normal file
131
internal/thinking/provider/codex/apply.go
Normal file
@@ -0,0 +1,131 @@
|
||||
// Package codex implements thinking configuration for Codex (OpenAI Responses API) models.
|
||||
//
|
||||
// Codex models use the reasoning.effort format with discrete levels
|
||||
// (low/medium/high). This is similar to OpenAI but uses nested field
|
||||
// "reasoning.effort" instead of "reasoning_effort".
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
|
||||
package codex
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for Codex models.
|
||||
//
|
||||
// Codex-specific behavior:
|
||||
// - Output format: reasoning.effort (string: low/medium/high/xhigh)
|
||||
// - Level-only mode: no numeric budget support
|
||||
// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new Codex thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("codex", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Codex request body.
|
||||
//
|
||||
// Expected output format:
|
||||
//
|
||||
// {
|
||||
// "reasoning": {
|
||||
// "effort": "high"
|
||||
// }
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if thinking.IsUserDefinedModel(modelInfo) {
|
||||
return applyCompatibleCodex(body, config)
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
|
||||
if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel {
|
||||
result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
effort := ""
|
||||
support := modelInfo.Thinking
|
||||
if config.Budget == 0 {
|
||||
if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
|
||||
effort = string(thinking.LevelNone)
|
||||
}
|
||||
}
|
||||
if effort == "" && config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
if effort == "" && len(support.Levels) > 0 {
|
||||
effort = support.Levels[0]
|
||||
}
|
||||
if effort == "" {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
var effort string
|
||||
switch config.Mode {
|
||||
case thinking.ModeLevel:
|
||||
if config.Level == "" {
|
||||
return body, nil
|
||||
}
|
||||
effort = string(config.Level)
|
||||
case thinking.ModeNone:
|
||||
effort = string(thinking.LevelNone)
|
||||
if config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
case thinking.ModeAuto:
|
||||
// Auto mode for user-defined models: pass through as "auto"
|
||||
effort = string(thinking.LevelAuto)
|
||||
case thinking.ModeBudget:
|
||||
// Budget mode: convert budget to level using threshold mapping
|
||||
level, ok := thinking.ConvertBudgetToLevel(config.Budget)
|
||||
if !ok {
|
||||
return body, nil
|
||||
}
|
||||
effort = level
|
||||
default:
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func hasLevel(levels []string, target string) bool {
|
||||
for _, level := range levels {
|
||||
if strings.EqualFold(strings.TrimSpace(level), target) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
169
internal/thinking/provider/gemini/apply.go
Normal file
169
internal/thinking/provider/gemini/apply.go
Normal file
@@ -0,0 +1,169 @@
|
||||
// Package gemini implements thinking configuration for Gemini models.
|
||||
//
|
||||
// Gemini models have two formats:
|
||||
// - Gemini 2.5: Uses thinkingBudget (numeric)
|
||||
// - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high)
|
||||
// or thinkingBudget=-1 for auto/dynamic mode
|
||||
//
|
||||
// Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels:
|
||||
// - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x)
|
||||
// - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels)
|
||||
// - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5)
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier applies thinking configuration for Gemini models.
|
||||
//
|
||||
// Gemini-specific behavior:
|
||||
// - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed
|
||||
// - Gemini 3.x: thinkingLevel format, cannot be disabled
|
||||
// - Use ThinkingSupport.Levels to decide output format
|
||||
type Applier struct{}
|
||||
|
||||
// NewApplier creates a new Gemini thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("gemini", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Gemini request body.
|
||||
//
|
||||
// Expected output format (Gemini 2.5):
|
||||
//
|
||||
// {
|
||||
// "generationConfig": {
|
||||
// "thinkingConfig": {
|
||||
// "thinkingBudget": 8192,
|
||||
// "includeThoughts": true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Expected output format (Gemini 3.x):
|
||||
//
|
||||
// {
|
||||
// "generationConfig": {
|
||||
// "thinkingConfig": {
|
||||
// "thinkingLevel": "high",
|
||||
// "includeThoughts": true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if thinking.IsUserDefinedModel(modelInfo) {
|
||||
return a.applyCompatible(body, config)
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
// Choose format based on config.Mode and model capabilities:
|
||||
// - ModeLevel: use Level format (validation will reject unsupported levels)
|
||||
// - ModeNone: use Level format if model has Levels, else Budget format
|
||||
// - ModeBudget/ModeAuto: use Budget format
|
||||
switch config.Mode {
|
||||
case thinking.ModeLevel:
|
||||
return a.applyLevelFormat(body, config)
|
||||
case thinking.ModeNone:
|
||||
// ModeNone: route based on model capability (has Levels or not)
|
||||
if len(modelInfo.Thinking.Levels) > 0 {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
return a.applyBudgetFormat(body, config)
|
||||
default:
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// ModeNone semantics:
|
||||
// - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models)
|
||||
// - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
|
||||
// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.
|
||||
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
|
||||
// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
|
||||
result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
|
||||
|
||||
if config.Mode == thinking.ModeNone {
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
|
||||
if config.Level != "" {
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Only handle ModeLevel - budget conversion should be done by upper layer
|
||||
if config.Mode != thinking.ModeLevel {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
level := string(config.Level)
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
|
||||
// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
|
||||
result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
|
||||
|
||||
budget := config.Budget
|
||||
// ModeNone semantics:
|
||||
// - ModeNone + Budget=0: completely disable thinking
|
||||
// - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
|
||||
// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
|
||||
includeThoughts := false
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
includeThoughts = false
|
||||
case thinking.ModeAuto:
|
||||
includeThoughts = true
|
||||
default:
|
||||
includeThoughts = budget > 0
|
||||
}
|
||||
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
|
||||
return result, nil
|
||||
}
|
||||
126
internal/thinking/provider/geminicli/apply.go
Normal file
126
internal/thinking/provider/geminicli/apply.go
Normal file
@@ -0,0 +1,126 @@
|
||||
// Package geminicli implements thinking configuration for Gemini CLI API format.
|
||||
//
|
||||
// Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of
|
||||
// generationConfig.thinkingConfig.* used by standard Gemini API.
|
||||
package geminicli
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier applies thinking configuration for Gemini CLI API format.
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new Gemini CLI thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("gemini-cli", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to Gemini CLI request body.
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if thinking.IsUserDefinedModel(modelInfo) {
|
||||
return a.applyCompatible(body, config)
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
// ModeAuto: Always use Budget format with thinkingBudget=-1
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
if config.Mode == thinking.ModeBudget {
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
// For non-auto modes, choose format based on model capabilities
|
||||
support := modelInfo.Thinking
|
||||
if len(support.Levels) > 0 {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeAuto {
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
|
||||
return a.applyLevelFormat(body, config)
|
||||
}
|
||||
|
||||
return a.applyBudgetFormat(body, config)
|
||||
}
|
||||
|
||||
func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
|
||||
result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
|
||||
|
||||
if config.Mode == thinking.ModeNone {
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
|
||||
if config.Level != "" {
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Only handle ModeLevel - budget conversion should be done by upper layer
|
||||
if config.Mode != thinking.ModeLevel {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
level := string(config.Level)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
|
||||
result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
|
||||
// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
|
||||
result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
|
||||
|
||||
budget := config.Budget
|
||||
includeThoughts := false
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
includeThoughts = false
|
||||
case thinking.ModeAuto:
|
||||
includeThoughts = true
|
||||
default:
|
||||
includeThoughts = budget > 0
|
||||
}
|
||||
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
|
||||
return result, nil
|
||||
}
|
||||
156
internal/thinking/provider/iflow/apply.go
Normal file
156
internal/thinking/provider/iflow/apply.go
Normal file
@@ -0,0 +1,156 @@
|
||||
// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
|
||||
//
|
||||
// iFlow models use boolean toggle semantics:
|
||||
// - GLM models: chat_template_kwargs.enable_thinking (boolean)
|
||||
// - MiniMax models: reasoning_split (boolean)
|
||||
//
|
||||
// Level values are converted to boolean: none=false, all others=true
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-9
|
||||
package iflow
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for iFlow models.
|
||||
//
|
||||
// iFlow-specific behavior:
|
||||
// - GLM models: enable_thinking boolean + clear_thinking=false
|
||||
// - MiniMax models: reasoning_split boolean
|
||||
// - Level to boolean: none=false, others=true
|
||||
// - No quantized support (only on/off)
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new iFlow thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("iflow", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to iFlow request body.
|
||||
//
|
||||
// Expected output format (GLM):
|
||||
//
|
||||
// {
|
||||
// "chat_template_kwargs": {
|
||||
// "enable_thinking": true,
|
||||
// "clear_thinking": false
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Expected output format (MiniMax):
|
||||
//
|
||||
// {
|
||||
// "reasoning_split": true
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if thinking.IsUserDefinedModel(modelInfo) {
|
||||
return body, nil
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if isGLMModel(modelInfo.ID) {
|
||||
return applyGLM(body, config), nil
|
||||
}
|
||||
|
||||
if isMiniMaxModel(modelInfo.ID) {
|
||||
return applyMiniMax(body, config), nil
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// configToBoolean converts ThinkingConfig to boolean for iFlow models.
|
||||
//
|
||||
// Conversion rules:
|
||||
// - ModeNone: false
|
||||
// - ModeAuto: true
|
||||
// - ModeBudget + Budget=0: false
|
||||
// - ModeBudget + Budget>0: true
|
||||
// - ModeLevel + Level="none": false
|
||||
// - ModeLevel + any other level: true
|
||||
// - Default (unknown mode): true
|
||||
func configToBoolean(config thinking.ThinkingConfig) bool {
|
||||
switch config.Mode {
|
||||
case thinking.ModeNone:
|
||||
return false
|
||||
case thinking.ModeAuto:
|
||||
return true
|
||||
case thinking.ModeBudget:
|
||||
return config.Budget > 0
|
||||
case thinking.ModeLevel:
|
||||
return config.Level != thinking.LevelNone
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// applyGLM applies thinking configuration for GLM models.
|
||||
//
|
||||
// Output format when enabled:
|
||||
//
|
||||
// {"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}}
|
||||
//
|
||||
// Output format when disabled:
|
||||
//
|
||||
// {"chat_template_kwargs": {"enable_thinking": false}}
|
||||
//
|
||||
// Note: clear_thinking is only set when thinking is enabled, to preserve
|
||||
// thinking output in the response.
|
||||
func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
|
||||
enableThinking := configToBoolean(config)
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||
|
||||
// clear_thinking only needed when thinking is enabled
|
||||
if enableThinking {
|
||||
result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// applyMiniMax applies thinking configuration for MiniMax models.
|
||||
//
|
||||
// Output format:
|
||||
//
|
||||
// {"reasoning_split": true/false}
|
||||
func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
|
||||
reasoningSplit := configToBoolean(config)
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// isGLMModel determines if the model is a GLM series model.
|
||||
// GLM models use chat_template_kwargs.enable_thinking format.
|
||||
func isGLMModel(modelID string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(modelID), "glm")
|
||||
}
|
||||
|
||||
// isMiniMaxModel determines if the model is a MiniMax series model.
|
||||
// MiniMax models use reasoning_split format.
|
||||
func isMiniMaxModel(modelID string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(modelID), "minimax")
|
||||
}
|
||||
128
internal/thinking/provider/openai/apply.go
Normal file
128
internal/thinking/provider/openai/apply.go
Normal file
@@ -0,0 +1,128 @@
|
||||
// Package openai implements thinking configuration for OpenAI/Codex models.
|
||||
//
|
||||
// OpenAI models use the reasoning_effort format with discrete levels
|
||||
// (low/medium/high). Some models support xhigh and none levels.
|
||||
// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
|
||||
package openai
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// Applier implements thinking.ProviderApplier for OpenAI models.
|
||||
//
|
||||
// OpenAI-specific behavior:
|
||||
// - Output format: reasoning_effort (string: low/medium/high/xhigh)
|
||||
// - Level-only mode: no numeric budget support
|
||||
// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
|
||||
type Applier struct{}
|
||||
|
||||
var _ thinking.ProviderApplier = (*Applier)(nil)
|
||||
|
||||
// NewApplier creates a new OpenAI thinking applier.
|
||||
func NewApplier() *Applier {
|
||||
return &Applier{}
|
||||
}
|
||||
|
||||
func init() {
|
||||
thinking.RegisterProvider("openai", NewApplier())
|
||||
}
|
||||
|
||||
// Apply applies thinking configuration to OpenAI request body.
|
||||
//
|
||||
// Expected output format:
|
||||
//
|
||||
// {
|
||||
// "reasoning_effort": "high"
|
||||
// }
|
||||
func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
|
||||
if thinking.IsUserDefinedModel(modelInfo) {
|
||||
return applyCompatibleOpenAI(body, config)
|
||||
}
|
||||
if modelInfo.Thinking == nil {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
|
||||
if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
if config.Mode == thinking.ModeLevel {
|
||||
result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
effort := ""
|
||||
support := modelInfo.Thinking
|
||||
if config.Budget == 0 {
|
||||
if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
|
||||
effort = string(thinking.LevelNone)
|
||||
}
|
||||
}
|
||||
if effort == "" && config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
if effort == "" && len(support.Levels) > 0 {
|
||||
effort = support.Levels[0]
|
||||
}
|
||||
if effort == "" {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
body = []byte(`{}`)
|
||||
}
|
||||
|
||||
var effort string
|
||||
switch config.Mode {
|
||||
case thinking.ModeLevel:
|
||||
if config.Level == "" {
|
||||
return body, nil
|
||||
}
|
||||
effort = string(config.Level)
|
||||
case thinking.ModeNone:
|
||||
effort = string(thinking.LevelNone)
|
||||
if config.Level != "" {
|
||||
effort = string(config.Level)
|
||||
}
|
||||
case thinking.ModeAuto:
|
||||
// Auto mode for user-defined models: pass through as "auto"
|
||||
effort = string(thinking.LevelAuto)
|
||||
case thinking.ModeBudget:
|
||||
// Budget mode: convert budget to level using threshold mapping
|
||||
level, ok := thinking.ConvertBudgetToLevel(config.Budget)
|
||||
if !ok {
|
||||
return body, nil
|
||||
}
|
||||
effort = level
|
||||
default:
|
||||
return body, nil
|
||||
}
|
||||
|
||||
result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func hasLevel(levels []string, target string) bool {
|
||||
for _, level := range levels {
|
||||
if strings.EqualFold(strings.TrimSpace(level), target) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
58
internal/thinking/strip.go
Normal file
58
internal/thinking/strip.go
Normal file
@@ -0,0 +1,58 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// StripThinkingConfig removes thinking configuration fields from request body.
|
||||
//
|
||||
// This function is used when a model doesn't support thinking but the request
|
||||
// contains thinking configuration. The configuration is silently removed to
|
||||
// prevent upstream API errors.
|
||||
//
|
||||
// Parameters:
|
||||
// - body: Original request body JSON
|
||||
// - provider: Provider name (determines which fields to strip)
|
||||
//
|
||||
// Returns:
|
||||
// - Modified request body JSON with thinking configuration removed
|
||||
// - Original body is returned unchanged if:
|
||||
// - body is empty or invalid JSON
|
||||
// - provider is unknown
|
||||
// - no thinking configuration found
|
||||
func StripThinkingConfig(body []byte, provider string) []byte {
|
||||
if len(body) == 0 || !gjson.ValidBytes(body) {
|
||||
return body
|
||||
}
|
||||
|
||||
var paths []string
|
||||
switch provider {
|
||||
case "claude":
|
||||
paths = []string{"thinking"}
|
||||
case "gemini":
|
||||
paths = []string{"generationConfig.thinkingConfig"}
|
||||
case "gemini-cli", "antigravity":
|
||||
paths = []string{"request.generationConfig.thinkingConfig"}
|
||||
case "openai":
|
||||
paths = []string{"reasoning_effort"}
|
||||
case "codex":
|
||||
paths = []string{"reasoning.effort"}
|
||||
case "iflow":
|
||||
paths = []string{
|
||||
"chat_template_kwargs.enable_thinking",
|
||||
"chat_template_kwargs.clear_thinking",
|
||||
"reasoning_split",
|
||||
"reasoning_effort",
|
||||
}
|
||||
default:
|
||||
return body
|
||||
}
|
||||
|
||||
result := body
|
||||
for _, path := range paths {
|
||||
result, _ = sjson.DeleteBytes(result, path)
|
||||
}
|
||||
return result
|
||||
}
|
||||
146
internal/thinking/suffix.go
Normal file
146
internal/thinking/suffix.go
Normal file
@@ -0,0 +1,146 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
//
|
||||
// This file implements suffix parsing functionality for extracting
|
||||
// thinking configuration from model names in the format model(value).
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseSuffix extracts thinking suffix from a model name.
|
||||
//
|
||||
// The suffix format is: model-name(value)
|
||||
// Examples:
|
||||
// - "claude-sonnet-4-5(16384)" -> ModelName="claude-sonnet-4-5", RawSuffix="16384"
|
||||
// - "gpt-5.2(high)" -> ModelName="gpt-5.2", RawSuffix="high"
|
||||
// - "gemini-2.5-pro" -> ModelName="gemini-2.5-pro", HasSuffix=false
|
||||
//
|
||||
// This function only extracts the suffix; it does not validate or interpret
|
||||
// the suffix content. Use ParseNumericSuffix, ParseLevelSuffix, etc. for
|
||||
// content interpretation.
|
||||
func ParseSuffix(model string) SuffixResult {
|
||||
// Find the last opening parenthesis
|
||||
lastOpen := strings.LastIndex(model, "(")
|
||||
if lastOpen == -1 {
|
||||
return SuffixResult{ModelName: model, HasSuffix: false}
|
||||
}
|
||||
|
||||
// Check if the string ends with a closing parenthesis
|
||||
if !strings.HasSuffix(model, ")") {
|
||||
return SuffixResult{ModelName: model, HasSuffix: false}
|
||||
}
|
||||
|
||||
// Extract components
|
||||
modelName := model[:lastOpen]
|
||||
rawSuffix := model[lastOpen+1 : len(model)-1]
|
||||
|
||||
return SuffixResult{
|
||||
ModelName: modelName,
|
||||
HasSuffix: true,
|
||||
RawSuffix: rawSuffix,
|
||||
}
|
||||
}
|
||||
|
||||
// ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value.
|
||||
//
|
||||
// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer.
|
||||
// Only non-negative integers are considered valid numeric suffixes.
|
||||
//
|
||||
// Platform note: The budget value uses Go's int type, which is 32-bit on 32-bit
|
||||
// systems and 64-bit on 64-bit systems. Values exceeding the platform's int range
|
||||
// will return ok=false.
|
||||
//
|
||||
// Leading zeros are accepted: "08192" parses as 8192.
|
||||
//
|
||||
// Examples:
|
||||
// - "8192" -> budget=8192, ok=true
|
||||
// - "0" -> budget=0, ok=true (represents ModeNone)
|
||||
// - "08192" -> budget=8192, ok=true (leading zeros accepted)
|
||||
// - "-1" -> budget=0, ok=false (negative numbers are not valid numeric suffixes)
|
||||
// - "high" -> budget=0, ok=false (not a number)
|
||||
// - "9223372036854775808" -> budget=0, ok=false (overflow on 64-bit systems)
|
||||
//
|
||||
// For special handling of -1 as auto mode, use ParseSpecialSuffix instead.
|
||||
func ParseNumericSuffix(rawSuffix string) (budget int, ok bool) {
|
||||
if rawSuffix == "" {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
value, err := strconv.Atoi(rawSuffix)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// Negative numbers are not valid numeric suffixes
|
||||
// -1 should be handled by special value parsing as "auto"
|
||||
if value < 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return value, true
|
||||
}
|
||||
|
||||
// ParseSpecialSuffix attempts to parse a raw suffix as a special thinking mode value.
|
||||
//
|
||||
// This function handles special strings that represent a change in thinking mode:
|
||||
// - "none" -> ModeNone (disables thinking)
|
||||
// - "auto" -> ModeAuto (automatic/dynamic thinking)
|
||||
// - "-1" -> ModeAuto (numeric representation of auto mode)
|
||||
//
|
||||
// String values are case-insensitive.
|
||||
func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) {
|
||||
if rawSuffix == "" {
|
||||
return ModeBudget, false
|
||||
}
|
||||
|
||||
// Case-insensitive matching
|
||||
switch strings.ToLower(rawSuffix) {
|
||||
case "none":
|
||||
return ModeNone, true
|
||||
case "auto", "-1":
|
||||
return ModeAuto, true
|
||||
default:
|
||||
return ModeBudget, false
|
||||
}
|
||||
}
|
||||
|
||||
// ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level.
|
||||
//
|
||||
// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level.
|
||||
// Only discrete effort levels are valid: minimal, low, medium, high, xhigh.
|
||||
// Level matching is case-insensitive.
|
||||
//
|
||||
// Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix
|
||||
// instead. This separation allows callers to prioritize special value handling.
|
||||
//
|
||||
// Examples:
|
||||
// - "high" -> level=LevelHigh, ok=true
|
||||
// - "HIGH" -> level=LevelHigh, ok=true (case insensitive)
|
||||
// - "medium" -> level=LevelMedium, ok=true
|
||||
// - "none" -> level="", ok=false (special value, use ParseSpecialSuffix)
|
||||
// - "auto" -> level="", ok=false (special value, use ParseSpecialSuffix)
|
||||
// - "8192" -> level="", ok=false (numeric, use ParseNumericSuffix)
|
||||
// - "ultra" -> level="", ok=false (unknown level)
|
||||
func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) {
|
||||
if rawSuffix == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Case-insensitive matching
|
||||
switch strings.ToLower(rawSuffix) {
|
||||
case "minimal":
|
||||
return LevelMinimal, true
|
||||
case "low":
|
||||
return LevelLow, true
|
||||
case "medium":
|
||||
return LevelMedium, true
|
||||
case "high":
|
||||
return LevelHigh, true
|
||||
case "xhigh":
|
||||
return LevelXHigh, true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
41
internal/thinking/text.go
Normal file
41
internal/thinking/text.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// GetThinkingText extracts the thinking text from a content part.
|
||||
// Handles various formats:
|
||||
// - Simple string: { "thinking": "text" } or { "text": "text" }
|
||||
// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
|
||||
// - Gemini-style: { "thought": true, "text": "text" }
|
||||
// Returns the extracted text string.
|
||||
func GetThinkingText(part gjson.Result) string {
|
||||
// Try direct text field first (Gemini-style)
|
||||
if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
|
||||
return text.String()
|
||||
}
|
||||
|
||||
// Try thinking field
|
||||
thinkingField := part.Get("thinking")
|
||||
if !thinkingField.Exists() {
|
||||
return ""
|
||||
}
|
||||
|
||||
// thinking is a string
|
||||
if thinkingField.Type == gjson.String {
|
||||
return thinkingField.String()
|
||||
}
|
||||
|
||||
// thinking is an object with inner text/thinking
|
||||
if thinkingField.IsObject() {
|
||||
if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String {
|
||||
return inner.String()
|
||||
}
|
||||
if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String {
|
||||
return inner.String()
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
116
internal/thinking/types.go
Normal file
116
internal/thinking/types.go
Normal file
@@ -0,0 +1,116 @@
|
||||
// Package thinking provides unified thinking configuration processing.
|
||||
//
|
||||
// This package offers a unified interface for parsing, validating, and applying
|
||||
// thinking configurations across various AI providers (Claude, Gemini, OpenAI, iFlow).
|
||||
package thinking
|
||||
|
||||
import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
|
||||
// ThinkingMode represents the type of thinking configuration mode.
|
||||
type ThinkingMode int
|
||||
|
||||
const (
|
||||
// ModeBudget indicates using a numeric budget (corresponds to suffix "(1000)" etc.)
|
||||
ModeBudget ThinkingMode = iota
|
||||
// ModeLevel indicates using a discrete level (corresponds to suffix "(high)" etc.)
|
||||
ModeLevel
|
||||
// ModeNone indicates thinking is disabled (corresponds to suffix "(none)" or budget=0)
|
||||
ModeNone
|
||||
// ModeAuto indicates automatic/dynamic thinking (corresponds to suffix "(auto)" or budget=-1)
|
||||
ModeAuto
|
||||
)
|
||||
|
||||
// String returns the string representation of ThinkingMode.
|
||||
func (m ThinkingMode) String() string {
|
||||
switch m {
|
||||
case ModeBudget:
|
||||
return "budget"
|
||||
case ModeLevel:
|
||||
return "level"
|
||||
case ModeNone:
|
||||
return "none"
|
||||
case ModeAuto:
|
||||
return "auto"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// ThinkingLevel represents a discrete thinking level.
|
||||
type ThinkingLevel string
|
||||
|
||||
const (
|
||||
// LevelNone disables thinking
|
||||
LevelNone ThinkingLevel = "none"
|
||||
// LevelAuto enables automatic/dynamic thinking
|
||||
LevelAuto ThinkingLevel = "auto"
|
||||
// LevelMinimal sets minimal thinking effort
|
||||
LevelMinimal ThinkingLevel = "minimal"
|
||||
// LevelLow sets low thinking effort
|
||||
LevelLow ThinkingLevel = "low"
|
||||
// LevelMedium sets medium thinking effort
|
||||
LevelMedium ThinkingLevel = "medium"
|
||||
// LevelHigh sets high thinking effort
|
||||
LevelHigh ThinkingLevel = "high"
|
||||
// LevelXHigh sets extra-high thinking effort
|
||||
LevelXHigh ThinkingLevel = "xhigh"
|
||||
)
|
||||
|
||||
// ThinkingConfig represents a unified thinking configuration.
|
||||
//
|
||||
// This struct is used to pass thinking configuration information between components.
|
||||
// Depending on Mode, either Budget or Level field is effective:
|
||||
// - ModeNone: Budget=0, Level is ignored
|
||||
// - ModeAuto: Budget=-1, Level is ignored
|
||||
// - ModeBudget: Budget is a positive integer, Level is ignored
|
||||
// - ModeLevel: Budget is ignored, Level is a valid level
|
||||
type ThinkingConfig struct {
|
||||
// Mode specifies the configuration mode
|
||||
Mode ThinkingMode
|
||||
// Budget is the thinking budget (token count), only effective when Mode is ModeBudget.
|
||||
// Special values: 0 means disabled, -1 means automatic
|
||||
Budget int
|
||||
// Level is the thinking level, only effective when Mode is ModeLevel
|
||||
Level ThinkingLevel
|
||||
}
|
||||
|
||||
// SuffixResult represents the result of parsing a model name for thinking suffix.
|
||||
//
|
||||
// A thinking suffix is specified in the format model-name(value), where value
|
||||
// can be a numeric budget (e.g., "16384") or a level name (e.g., "high").
|
||||
type SuffixResult struct {
|
||||
// ModelName is the model name with the suffix removed.
|
||||
// If no suffix was found, this equals the original input.
|
||||
ModelName string
|
||||
|
||||
// HasSuffix indicates whether a valid suffix was found.
|
||||
HasSuffix bool
|
||||
|
||||
// RawSuffix is the content inside the parentheses, without the parentheses.
|
||||
// Empty string if HasSuffix is false.
|
||||
RawSuffix string
|
||||
}
|
||||
|
||||
// ProviderApplier defines the interface for provider-specific thinking configuration application.
|
||||
//
|
||||
// Types implementing this interface are responsible for converting a unified ThinkingConfig
|
||||
// into provider-specific format and applying it to the request body.
|
||||
//
|
||||
// Implementation requirements:
|
||||
// - Apply method must be idempotent
|
||||
// - Must not modify the input config or modelInfo
|
||||
// - Returns a modified copy of the request body
|
||||
// - Returns appropriate ThinkingError for unsupported configurations
|
||||
type ProviderApplier interface {
|
||||
// Apply applies the thinking configuration to the request body.
|
||||
//
|
||||
// Parameters:
|
||||
// - body: Original request body JSON
|
||||
// - config: Unified thinking configuration
|
||||
// - modelInfo: Model registry information containing ThinkingSupport properties
|
||||
//
|
||||
// Returns:
|
||||
// - Modified request body JSON
|
||||
// - ThinkingError if the configuration is invalid or unsupported
|
||||
Apply(body []byte, config ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error)
|
||||
}
|
||||
378
internal/thinking/validate.go
Normal file
378
internal/thinking/validate.go
Normal file
@@ -0,0 +1,378 @@
|
||||
// Package thinking provides unified thinking configuration processing logic.
|
||||
package thinking
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// ValidateConfig validates a thinking configuration against model capabilities.
|
||||
//
|
||||
// This function performs comprehensive validation:
|
||||
// - Checks if the model supports thinking
|
||||
// - Auto-converts between Budget and Level formats based on model capability
|
||||
// - Validates that requested level is in the model's supported levels list
|
||||
// - Clamps budget values to model's allowed range
|
||||
// - When converting Budget -> Level for level-only models, clamps the derived standard level to the nearest supported level
|
||||
// (special values none/auto are preserved)
|
||||
// - When config comes from a model suffix, strict budget validation is disabled (we clamp instead of error)
|
||||
//
|
||||
// Parameters:
|
||||
// - config: The thinking configuration to validate
|
||||
// - support: Model's ThinkingSupport properties (nil means no thinking support)
|
||||
// - fromFormat: Source provider format (used to determine strict validation rules)
|
||||
// - toFormat: Target provider format
|
||||
// - fromSuffix: Whether config was sourced from model suffix
|
||||
//
|
||||
// Returns:
|
||||
// - Normalized ThinkingConfig with clamped values
|
||||
// - ThinkingError if validation fails (ErrThinkingNotSupported, ErrLevelNotSupported, etc.)
|
||||
//
|
||||
// Auto-conversion behavior:
|
||||
// - Budget-only model + Level config → Level converted to Budget
|
||||
// - Level-only model + Budget config → Budget converted to Level
|
||||
// - Hybrid model → preserve original format
|
||||
func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string, fromSuffix bool) (*ThinkingConfig, error) {
|
||||
fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat))
|
||||
model := "unknown"
|
||||
support := (*registry.ThinkingSupport)(nil)
|
||||
if modelInfo != nil {
|
||||
if modelInfo.ID != "" {
|
||||
model = modelInfo.ID
|
||||
}
|
||||
support = modelInfo.Thinking
|
||||
}
|
||||
|
||||
if support == nil {
|
||||
if config.Mode != ModeNone {
|
||||
return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", model)
|
||||
}
|
||||
return &config, nil
|
||||
}
|
||||
|
||||
allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat)
|
||||
strictBudget := !fromSuffix && fromFormat != "" && isSameProviderFamily(fromFormat, toFormat)
|
||||
budgetDerivedFromLevel := false
|
||||
|
||||
capability := detectModelCapability(modelInfo)
|
||||
switch capability {
|
||||
case CapabilityBudgetOnly:
|
||||
if config.Mode == ModeLevel {
|
||||
if config.Level == LevelAuto {
|
||||
break
|
||||
}
|
||||
budget, ok := ConvertLevelToBudget(string(config.Level))
|
||||
if !ok {
|
||||
return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", config.Level))
|
||||
}
|
||||
config.Mode = ModeBudget
|
||||
config.Budget = budget
|
||||
config.Level = ""
|
||||
budgetDerivedFromLevel = true
|
||||
}
|
||||
case CapabilityLevelOnly:
|
||||
if config.Mode == ModeBudget {
|
||||
level, ok := ConvertBudgetToLevel(config.Budget)
|
||||
if !ok {
|
||||
return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", config.Budget))
|
||||
}
|
||||
// When converting Budget -> Level for level-only models, clamp the derived standard level
|
||||
// to the nearest supported level. Special values (none/auto) are preserved.
|
||||
config.Mode = ModeLevel
|
||||
config.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat)
|
||||
config.Budget = 0
|
||||
}
|
||||
case CapabilityHybrid:
|
||||
}
|
||||
|
||||
if config.Mode == ModeLevel && config.Level == LevelNone {
|
||||
config.Mode = ModeNone
|
||||
config.Budget = 0
|
||||
config.Level = ""
|
||||
}
|
||||
if config.Mode == ModeLevel && config.Level == LevelAuto {
|
||||
config.Mode = ModeAuto
|
||||
config.Budget = -1
|
||||
config.Level = ""
|
||||
}
|
||||
if config.Mode == ModeBudget && config.Budget == 0 {
|
||||
config.Mode = ModeNone
|
||||
config.Level = ""
|
||||
}
|
||||
|
||||
if len(support.Levels) > 0 && config.Mode == ModeLevel {
|
||||
if !isLevelSupported(string(config.Level), support.Levels) {
|
||||
if allowClampUnsupported {
|
||||
config.Level = clampLevel(config.Level, modelInfo, toFormat)
|
||||
}
|
||||
if !isLevelSupported(string(config.Level), support.Levels) {
|
||||
// User explicitly specified an unsupported level - return error
|
||||
// (budget-derived levels may be clamped based on source format)
|
||||
validLevels := normalizeLevels(support.Levels)
|
||||
message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(config.Level)), strings.Join(validLevels, ", "))
|
||||
return nil, NewThinkingError(ErrLevelNotSupported, message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if strictBudget && config.Mode == ModeBudget && !budgetDerivedFromLevel {
|
||||
min, max := support.Min, support.Max
|
||||
if min != 0 || max != 0 {
|
||||
if config.Budget < min || config.Budget > max || (config.Budget == 0 && !support.ZeroAllowed) {
|
||||
message := fmt.Sprintf("budget %d out of range [%d,%d]", config.Budget, min, max)
|
||||
return nil, NewThinkingError(ErrBudgetOutOfRange, message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert ModeAuto to mid-range if dynamic not allowed
|
||||
if config.Mode == ModeAuto && !support.DynamicAllowed {
|
||||
config = convertAutoToMidRange(config, support, toFormat, model)
|
||||
}
|
||||
|
||||
if config.Mode == ModeNone && toFormat == "claude" {
|
||||
// Claude supports explicit disable via thinking.type="disabled".
|
||||
// Keep Budget=0 so applier can omit budget_tokens.
|
||||
config.Budget = 0
|
||||
config.Level = ""
|
||||
} else {
|
||||
switch config.Mode {
|
||||
case ModeBudget, ModeAuto, ModeNone:
|
||||
config.Budget = clampBudget(config.Budget, modelInfo, toFormat)
|
||||
}
|
||||
|
||||
// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
|
||||
// This ensures Apply layer doesn't need to access support.Levels
|
||||
if config.Mode == ModeNone && config.Budget > 0 && len(support.Levels) > 0 {
|
||||
config.Level = ThinkingLevel(support.Levels[0])
|
||||
}
|
||||
}
|
||||
|
||||
return &config, nil
|
||||
}
|
||||
|
||||
// convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
|
||||
//
|
||||
// This function handles the case where a model does not support dynamic/auto thinking.
|
||||
// The auto mode is silently converted to a fixed value based on model capability:
|
||||
// - Level-only models: convert to ModeLevel with LevelMedium
|
||||
// - Budget models: convert to ModeBudget with mid = (Min + Max) / 2
|
||||
//
|
||||
// Logging:
|
||||
// - Debug level when conversion occurs
|
||||
// - Fields: original_mode, clamped_to, reason
|
||||
func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport, provider, model string) ThinkingConfig {
|
||||
// For level-only models (has Levels but no Min/Max range), use ModeLevel with medium
|
||||
if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 {
|
||||
config.Mode = ModeLevel
|
||||
config.Level = LevelMedium
|
||||
config.Budget = 0
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"original_mode": "auto",
|
||||
"clamped_to": string(LevelMedium),
|
||||
}).Debug("thinking: mode converted, dynamic not allowed, using medium level |")
|
||||
return config
|
||||
}
|
||||
|
||||
// For budget models, use mid-range budget
|
||||
mid := (support.Min + support.Max) / 2
|
||||
if mid <= 0 && support.ZeroAllowed {
|
||||
config.Mode = ModeNone
|
||||
config.Budget = 0
|
||||
} else if mid <= 0 {
|
||||
config.Mode = ModeBudget
|
||||
config.Budget = support.Min
|
||||
} else {
|
||||
config.Mode = ModeBudget
|
||||
config.Budget = mid
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"original_mode": "auto",
|
||||
"clamped_to": config.Budget,
|
||||
}).Debug("thinking: mode converted, dynamic not allowed |")
|
||||
return config
|
||||
}
|
||||
|
||||
// standardLevelOrder defines the canonical ordering of thinking levels from lowest to highest.
|
||||
var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh}
|
||||
|
||||
// clampLevel clamps the given level to the nearest supported level.
|
||||
// On tie, prefers the lower level.
|
||||
func clampLevel(level ThinkingLevel, modelInfo *registry.ModelInfo, provider string) ThinkingLevel {
|
||||
model := "unknown"
|
||||
var supported []string
|
||||
if modelInfo != nil {
|
||||
if modelInfo.ID != "" {
|
||||
model = modelInfo.ID
|
||||
}
|
||||
if modelInfo.Thinking != nil {
|
||||
supported = modelInfo.Thinking.Levels
|
||||
}
|
||||
}
|
||||
|
||||
if len(supported) == 0 || isLevelSupported(string(level), supported) {
|
||||
return level
|
||||
}
|
||||
|
||||
pos := levelIndex(string(level))
|
||||
if pos == -1 {
|
||||
return level
|
||||
}
|
||||
bestIdx, bestDist := -1, len(standardLevelOrder)+1
|
||||
|
||||
for _, s := range supported {
|
||||
if idx := levelIndex(strings.TrimSpace(s)); idx != -1 {
|
||||
if dist := abs(pos - idx); dist < bestDist || (dist == bestDist && idx < bestIdx) {
|
||||
bestIdx, bestDist = idx, dist
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if bestIdx >= 0 {
|
||||
clamped := standardLevelOrder[bestIdx]
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"original_value": string(level),
|
||||
"clamped_to": string(clamped),
|
||||
}).Debug("thinking: level clamped |")
|
||||
return clamped
|
||||
}
|
||||
return level
|
||||
}
|
||||
|
||||
// clampBudget clamps a budget value to the model's supported range.
|
||||
func clampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
|
||||
model := "unknown"
|
||||
support := (*registry.ThinkingSupport)(nil)
|
||||
if modelInfo != nil {
|
||||
if modelInfo.ID != "" {
|
||||
model = modelInfo.ID
|
||||
}
|
||||
support = modelInfo.Thinking
|
||||
}
|
||||
if support == nil {
|
||||
return value
|
||||
}
|
||||
|
||||
// Auto value (-1) passes through without clamping.
|
||||
if value == -1 {
|
||||
return value
|
||||
}
|
||||
|
||||
min, max := support.Min, support.Max
|
||||
if value == 0 && !support.ZeroAllowed {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"original_value": value,
|
||||
"clamped_to": min,
|
||||
"min": min,
|
||||
"max": max,
|
||||
}).Warn("thinking: budget zero not allowed |")
|
||||
return min
|
||||
}
|
||||
|
||||
// Some models are level-only and do not define numeric budget ranges.
|
||||
if min == 0 && max == 0 {
|
||||
return value
|
||||
}
|
||||
|
||||
if value < min {
|
||||
if value == 0 && support.ZeroAllowed {
|
||||
return 0
|
||||
}
|
||||
logClamp(provider, model, value, min, min, max)
|
||||
return min
|
||||
}
|
||||
if value > max {
|
||||
logClamp(provider, model, value, max, min, max)
|
||||
return max
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func isLevelSupported(level string, supported []string) bool {
|
||||
for _, s := range supported {
|
||||
if strings.EqualFold(level, strings.TrimSpace(s)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func levelIndex(level string) int {
|
||||
for i, l := range standardLevelOrder {
|
||||
if strings.EqualFold(level, string(l)) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func normalizeLevels(levels []string) []string {
|
||||
out := make([]string, len(levels))
|
||||
for i, l := range levels {
|
||||
out[i] = strings.ToLower(strings.TrimSpace(l))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func isBudgetBasedProvider(provider string) bool {
|
||||
switch provider {
|
||||
case "gemini", "gemini-cli", "antigravity", "claude":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isLevelBasedProvider(provider string) bool {
|
||||
switch provider {
|
||||
case "openai", "openai-response", "codex":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isGeminiFamily(provider string) bool {
|
||||
switch provider {
|
||||
case "gemini", "gemini-cli", "antigravity":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isSameProviderFamily(from, to string) bool {
|
||||
if from == to {
|
||||
return true
|
||||
}
|
||||
return isGeminiFamily(from) && isGeminiFamily(to)
|
||||
}
|
||||
|
||||
func abs(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func logClamp(provider, model string, original, clampedTo, min, max int) {
|
||||
log.WithFields(log.Fields{
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"original_value": original,
|
||||
"min": min,
|
||||
"max": max,
|
||||
"clamped_to": clampedTo,
|
||||
}).Debug("thinking: budget clamped |")
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -21,6 +22,14 @@ import (
|
||||
// deriveSessionID generates a stable session ID from the request.
|
||||
// Uses the hash of the first user message to identify the conversation.
|
||||
func deriveSessionID(rawJSON []byte) string {
|
||||
userIDResult := gjson.GetBytes(rawJSON, "metadata.user_id")
|
||||
if userIDResult.Exists() {
|
||||
userID := userIDResult.String()
|
||||
idx := strings.Index(userID, "session_")
|
||||
if idx != -1 {
|
||||
return userID[idx+8:]
|
||||
}
|
||||
}
|
||||
messages := gjson.GetBytes(rawJSON, "messages")
|
||||
if !messages.IsArray() {
|
||||
return ""
|
||||
@@ -60,6 +69,7 @@ func deriveSessionID(rawJSON []byte) string {
|
||||
// Returns:
|
||||
// - []byte: The transformed request data in Gemini CLI API format
|
||||
func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
|
||||
enableThoughtTranslate := true
|
||||
rawJSON := bytes.Clone(inputRawJSON)
|
||||
|
||||
// Derive session ID for signature caching
|
||||
@@ -122,26 +132,33 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
contentTypeResult := contentResult.Get("type")
|
||||
if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
|
||||
// Use GetThinkingText to handle wrapped thinking objects
|
||||
thinkingText := util.GetThinkingText(contentResult)
|
||||
signatureResult := contentResult.Get("signature")
|
||||
clientSignature := ""
|
||||
if signatureResult.Exists() && signatureResult.String() != "" {
|
||||
clientSignature = signatureResult.String()
|
||||
}
|
||||
thinkingText := thinking.GetThinkingText(contentResult)
|
||||
|
||||
// Always try cached signature first (more reliable than client-provided)
|
||||
// Client may send stale or invalid signatures from different sessions
|
||||
signature := ""
|
||||
if sessionID != "" && thinkingText != "" {
|
||||
if cachedSig := cache.GetCachedSignature(sessionID, thinkingText); cachedSig != "" {
|
||||
if cachedSig := cache.GetCachedSignature(modelName, sessionID, thinkingText); cachedSig != "" {
|
||||
signature = cachedSig
|
||||
// log.Debugf("Using cached signature for thinking block")
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to client signature only if cache miss and client signature is valid
|
||||
if signature == "" && cache.HasValidSignature(clientSignature) {
|
||||
signature = clientSignature
|
||||
if signature == "" {
|
||||
signatureResult := contentResult.Get("signature")
|
||||
clientSignature := ""
|
||||
if signatureResult.Exists() && signatureResult.String() != "" {
|
||||
arrayClientSignatures := strings.SplitN(signatureResult.String(), "#", 2)
|
||||
if len(arrayClientSignatures) == 2 {
|
||||
if modelName == arrayClientSignatures[0] {
|
||||
clientSignature = arrayClientSignatures[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
if cache.HasValidSignature(clientSignature) {
|
||||
signature = clientSignature
|
||||
}
|
||||
// log.Debugf("Using client-provided signature for thinking block")
|
||||
}
|
||||
|
||||
@@ -158,6 +175,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
// Converting to text would break this requirement
|
||||
if isUnsigned {
|
||||
// log.Debugf("Dropping unsigned thinking block (no valid signature)")
|
||||
enableThoughtTranslate = false
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -385,12 +403,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
}
|
||||
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); enableThoughtTranslate && t.Exists() && t.IsObject() {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
@@ -75,28 +76,42 @@ func TestConvertClaudeRequestToAntigravity_RoleMapping(t *testing.T) {
|
||||
func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
|
||||
// Valid signature must be at least 50 characters
|
||||
validSignature := "abc123validSignature1234567890123456789012345678901234567890"
|
||||
thinkingText := "Let me think..."
|
||||
|
||||
// Pre-cache the signature (simulating a response from the same session)
|
||||
// The session ID is derived from the first user message hash
|
||||
// Since there's no user message in this test, we need to add one
|
||||
inputJSON := []byte(`{
|
||||
"model": "claude-sonnet-4-5-thinking",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Test user message"}]
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
|
||||
{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
|
||||
{"type": "text", "text": "Answer"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
// Derive session ID and cache the signature
|
||||
sessionID := deriveSessionID(inputJSON)
|
||||
cache.CacheSignature(sessionID, thinkingText, validSignature)
|
||||
defer cache.ClearSignatureCache(sessionID)
|
||||
|
||||
output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
|
||||
outputStr := string(output)
|
||||
|
||||
// Check thinking block conversion
|
||||
firstPart := gjson.Get(outputStr, "request.contents.0.parts.0")
|
||||
// Check thinking block conversion (now in contents.1 due to user message)
|
||||
firstPart := gjson.Get(outputStr, "request.contents.1.parts.0")
|
||||
if !firstPart.Get("thought").Bool() {
|
||||
t.Error("thinking block should have thought: true")
|
||||
}
|
||||
if firstPart.Get("text").String() != "Let me think..." {
|
||||
if firstPart.Get("text").String() != thinkingText {
|
||||
t.Error("thinking text mismatch")
|
||||
}
|
||||
if firstPart.Get("thoughtSignature").String() != validSignature {
|
||||
@@ -227,13 +242,19 @@ func TestConvertClaudeRequestToAntigravity_ToolUse(t *testing.T) {
|
||||
|
||||
func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
|
||||
validSignature := "abc123validSignature1234567890123456789012345678901234567890"
|
||||
thinkingText := "Let me think..."
|
||||
|
||||
inputJSON := []byte(`{
|
||||
"model": "claude-sonnet-4-5-thinking",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Test user message"}]
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
|
||||
{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "call_123",
|
||||
@@ -245,11 +266,16 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
|
||||
]
|
||||
}`)
|
||||
|
||||
// Derive session ID and cache the signature
|
||||
sessionID := deriveSessionID(inputJSON)
|
||||
cache.CacheSignature(sessionID, thinkingText, validSignature)
|
||||
defer cache.ClearSignatureCache(sessionID)
|
||||
|
||||
output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
|
||||
outputStr := string(output)
|
||||
|
||||
// Check function call has the signature from the preceding thinking block
|
||||
part := gjson.Get(outputStr, "request.contents.0.parts.1")
|
||||
// Check function call has the signature from the preceding thinking block (now in contents.1)
|
||||
part := gjson.Get(outputStr, "request.contents.1.parts.1")
|
||||
if part.Get("functionCall.name").String() != "get_weather" {
|
||||
t.Errorf("Expected functionCall, got %s", part.Raw)
|
||||
}
|
||||
@@ -261,24 +287,35 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
|
||||
func TestConvertClaudeRequestToAntigravity_ReorderThinking(t *testing.T) {
|
||||
// Case: text block followed by thinking block -> should be reordered to thinking first
|
||||
validSignature := "abc123validSignature1234567890123456789012345678901234567890"
|
||||
thinkingText := "Planning..."
|
||||
|
||||
inputJSON := []byte(`{
|
||||
"model": "claude-sonnet-4-5-thinking",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Test user message"}]
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "Here is the plan."},
|
||||
{"type": "thinking", "thinking": "Planning...", "signature": "` + validSignature + `"}
|
||||
{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
// Derive session ID and cache the signature
|
||||
sessionID := deriveSessionID(inputJSON)
|
||||
cache.CacheSignature(sessionID, thinkingText, validSignature)
|
||||
defer cache.ClearSignatureCache(sessionID)
|
||||
|
||||
output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
|
||||
outputStr := string(output)
|
||||
|
||||
// Verify order: Thinking block MUST be first
|
||||
parts := gjson.Get(outputStr, "request.contents.0.parts").Array()
|
||||
// Verify order: Thinking block MUST be first (now in contents.1 due to user message)
|
||||
parts := gjson.Get(outputStr, "request.contents.1.parts").Array()
|
||||
if len(parts) != 2 {
|
||||
t.Fatalf("Expected 2 parts, got %d", len(parts))
|
||||
}
|
||||
@@ -343,8 +380,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingConfig(t *testing.T) {
|
||||
if thinkingConfig.Get("thinkingBudget").Int() != 8000 {
|
||||
t.Errorf("Expected thinkingBudget 8000, got %d", thinkingConfig.Get("thinkingBudget").Int())
|
||||
}
|
||||
if !thinkingConfig.Get("include_thoughts").Bool() {
|
||||
t.Error("include_thoughts should be true")
|
||||
if !thinkingConfig.Get("includeThoughts").Bool() {
|
||||
t.Error("includeThoughts should be true")
|
||||
}
|
||||
} else {
|
||||
t.Log("thinkingConfig not present - model may not be registered in test registry")
|
||||
@@ -460,6 +497,9 @@ func TestConvertClaudeRequestToAntigravity_TrailingUnsignedThinking_Removed(t *t
|
||||
|
||||
func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testing.T) {
|
||||
// Last assistant message ends with signed thinking block - should be kept
|
||||
validSignature := "abc123validSignature1234567890123456789012345678901234567890"
|
||||
thinkingText := "Valid thinking..."
|
||||
|
||||
inputJSON := []byte(`{
|
||||
"model": "claude-sonnet-4-5-thinking",
|
||||
"messages": [
|
||||
@@ -471,12 +511,17 @@ func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testin
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "Here is my answer"},
|
||||
{"type": "thinking", "thinking": "Valid thinking...", "signature": "abc123validSignature1234567890123456789012345678901234567890"}
|
||||
{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
// Derive session ID and cache the signature
|
||||
sessionID := deriveSessionID(inputJSON)
|
||||
cache.CacheSignature(sessionID, thinkingText, validSignature)
|
||||
defer cache.ClearSignatureCache(sessionID)
|
||||
|
||||
output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
|
||||
outputStr := string(output)
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
SessionID: deriveSessionID(originalRequestRawJSON),
|
||||
}
|
||||
}
|
||||
modelName := gjson.GetBytes(requestRawJSON, "model").String()
|
||||
|
||||
params := (*param).(*Params)
|
||||
|
||||
@@ -139,13 +140,13 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
// log.Debug("Branch: signature_delta")
|
||||
|
||||
if params.SessionID != "" && params.CurrentThinkingText.Len() > 0 {
|
||||
cache.CacheSignature(params.SessionID, params.CurrentThinkingText.String(), thoughtSignature.String())
|
||||
cache.CacheSignature(modelName, params.SessionID, params.CurrentThinkingText.String(), thoughtSignature.String())
|
||||
// log.Debugf("Cached signature for thinking block (sessionID=%s, textLen=%d)", params.SessionID, params.CurrentThinkingText.Len())
|
||||
params.CurrentThinkingText.Reset()
|
||||
}
|
||||
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", fmt.Sprintf("%s#%s", modelName, thoughtSignature.String()))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.HasContent = true
|
||||
} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
|
||||
@@ -372,7 +373,7 @@ func resolveStopReason(params *Params) string {
|
||||
// - string: A Claude-compatible JSON response.
|
||||
func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
|
||||
_ = originalRequestRawJSON
|
||||
_ = requestRawJSON
|
||||
modelName := gjson.GetBytes(requestRawJSON, "model").String()
|
||||
|
||||
root := gjson.ParseBytes(rawJSON)
|
||||
promptTokens := root.Get("response.usageMetadata.promptTokenCount").Int()
|
||||
@@ -437,7 +438,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
|
||||
block := `{"type":"thinking","thinking":""}`
|
||||
block, _ = sjson.Set(block, "thinking", thinkingBuilder.String())
|
||||
if thinkingSignature != "" {
|
||||
block, _ = sjson.Set(block, "signature", thinkingSignature)
|
||||
block, _ = sjson.Set(block, "signature", fmt.Sprintf("%s#%s", modelName, thinkingSignature))
|
||||
}
|
||||
responseJSON, _ = sjson.SetRaw(responseJSON, "content.-1", block)
|
||||
thinkingBuilder.Reset()
|
||||
|
||||
@@ -35,66 +35,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
// Model
|
||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||
|
||||
// Reasoning effort -> thinkingBudget/include_thoughts
|
||||
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||
// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
|
||||
// Inline translation-only mapping; capability checks happen later in ApplyThinking.
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if re.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
||||
if util.IsGemini3Model(modelName) {
|
||||
switch effort {
|
||||
case "none":
|
||||
out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig")
|
||||
case "auto":
|
||||
includeThoughts := true
|
||||
out = util.ApplyGeminiCLIThinkingLevel(out, "", &includeThoughts)
|
||||
default:
|
||||
if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
|
||||
out = util.ApplyGeminiCLIThinkingLevel(out, level, nil)
|
||||
}
|
||||
}
|
||||
} else if !util.ModelUsesThinkingLevels(modelName) {
|
||||
out = util.ApplyReasoningEffortToGeminiCLI(out, effort)
|
||||
}
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
|
||||
if v := tc.Get("thinkingBudget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
} else if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
|
||||
if v := tc.Get("includeThoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget && budget != 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens
|
||||
// This allows Claude Code and other Claude API clients to pass thinking configuration
|
||||
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
if effort != "" {
|
||||
thinkingPath := "request.generationConfig.thinkingConfig"
|
||||
if effort == "auto" {
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
|
||||
} else {
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -113,6 +66,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num)
|
||||
}
|
||||
|
||||
// Candidate count (OpenAI 'n' parameter)
|
||||
if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
|
||||
if val := n.Int(); val > 1 {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.candidateCount", val)
|
||||
}
|
||||
}
|
||||
|
||||
// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
|
||||
// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
|
||||
if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
|
||||
@@ -179,6 +139,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
}
|
||||
}
|
||||
|
||||
systemPartIndex := 0
|
||||
for i := 0; i < len(arr); i++ {
|
||||
m := arr[i]
|
||||
role := m.Get("role").String()
|
||||
@@ -188,16 +149,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
// system -> request.systemInstruction as a user message style
|
||||
if content.Type == gjson.String {
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
|
||||
systemPartIndex++
|
||||
} else if content.IsObject() && content.Get("type").String() == "text" {
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
|
||||
systemPartIndex++
|
||||
} else if content.IsArray() {
|
||||
contents := content.Array()
|
||||
if len(contents) > 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
|
||||
for j := 0; j < len(contents); j++ {
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
|
||||
systemPartIndex++
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -212,7 +176,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
for _, item := range items {
|
||||
switch item.Get("type").String() {
|
||||
case "text":
|
||||
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
|
||||
text := item.Get("text").String()
|
||||
if text != "" {
|
||||
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
|
||||
}
|
||||
p++
|
||||
case "image_url":
|
||||
imageURL := item.Get("image_url.url").String()
|
||||
@@ -256,6 +223,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
for _, item := range content.Array() {
|
||||
switch item.Get("type").String() {
|
||||
case "text":
|
||||
text := item.Get("text").String()
|
||||
if text != "" {
|
||||
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
|
||||
}
|
||||
p++
|
||||
case "image_url":
|
||||
// If the assistant returned an inline data URL, preserve it for history fidelity.
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -114,15 +115,40 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
}
|
||||
}
|
||||
// Include thoughts configuration for reasoning process visibility
|
||||
// Only apply for models that support thinking and use numeric budgets, not discrete levels.
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
// Check for thinkingBudget first - if present, enable thinking with budget
|
||||
if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
|
||||
// Translator only does format conversion, ApplyThinking handles model capability validation.
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
|
||||
level := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
|
||||
switch level {
|
||||
case "":
|
||||
case "none":
|
||||
out, _ = sjson.Set(out, "thinking.type", "disabled")
|
||||
out, _ = sjson.Delete(out, "thinking.budget_tokens")
|
||||
case "auto":
|
||||
out, _ = sjson.Set(out, "thinking.type", "enabled")
|
||||
out, _ = sjson.Delete(out, "thinking.budget_tokens")
|
||||
default:
|
||||
if budget, ok := thinking.ConvertLevelToBudget(level); ok {
|
||||
out, _ = sjson.Set(out, "thinking.type", "enabled")
|
||||
out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
|
||||
}
|
||||
}
|
||||
} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
|
||||
budget := int(thinkingBudget.Int())
|
||||
switch budget {
|
||||
case 0:
|
||||
out, _ = sjson.Set(out, "thinking.type", "disabled")
|
||||
out, _ = sjson.Delete(out, "thinking.budget_tokens")
|
||||
case -1:
|
||||
out, _ = sjson.Set(out, "thinking.type", "enabled")
|
||||
out, _ = sjson.Delete(out, "thinking.budget_tokens")
|
||||
default:
|
||||
out, _ = sjson.Set(out, "thinking.type", "enabled")
|
||||
out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
|
||||
}
|
||||
} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
|
||||
out, _ = sjson.Set(out, "thinking.type", "enabled")
|
||||
normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
|
||||
out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
|
||||
} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
|
||||
// Fallback to include_thoughts if no budget specified
|
||||
out, _ = sjson.Set(out, "thinking.type", "enabled")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -65,10 +65,11 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
|
||||
root := gjson.ParseBytes(rawJSON)
|
||||
|
||||
if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
// Convert OpenAI reasoning_effort to Claude thinking config.
|
||||
if v := root.Get("reasoning_effort"); v.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(v.String()))
|
||||
if effort != "" {
|
||||
budget, ok := util.ThinkingEffortToBudget(modelName, effort)
|
||||
budget, ok := thinking.ConvertLevelToBudget(effort)
|
||||
if ok {
|
||||
switch budget {
|
||||
case 0:
|
||||
@@ -137,17 +138,35 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
|
||||
// Process messages and transform them to Claude Code format
|
||||
if messages := root.Get("messages"); messages.Exists() && messages.IsArray() {
|
||||
messageIndex := 0
|
||||
systemMessageIndex := -1
|
||||
messages.ForEach(func(_, message gjson.Result) bool {
|
||||
role := message.Get("role").String()
|
||||
contentResult := message.Get("content")
|
||||
|
||||
switch role {
|
||||
case "system", "user", "assistant":
|
||||
// Create Claude Code message with appropriate role mapping
|
||||
if role == "system" {
|
||||
role = "user"
|
||||
case "system":
|
||||
if systemMessageIndex == -1 {
|
||||
systemMsg := `{"role":"user","content":[]}`
|
||||
out, _ = sjson.SetRaw(out, "messages.-1", systemMsg)
|
||||
systemMessageIndex = messageIndex
|
||||
messageIndex++
|
||||
}
|
||||
|
||||
if contentResult.Exists() && contentResult.Type == gjson.String && contentResult.String() != "" {
|
||||
textPart := `{"type":"text","text":""}`
|
||||
textPart, _ = sjson.Set(textPart, "text", contentResult.String())
|
||||
out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
|
||||
} else if contentResult.Exists() && contentResult.IsArray() {
|
||||
contentResult.ForEach(func(_, part gjson.Result) bool {
|
||||
if part.Get("type").String() == "text" {
|
||||
textPart := `{"type":"text","text":""}`
|
||||
textPart, _ = sjson.Set(textPart, "text", part.Get("text").String())
|
||||
out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
case "user", "assistant":
|
||||
msg := `{"role":"","content":[]}`
|
||||
msg, _ = sjson.Set(msg, "role", role)
|
||||
|
||||
@@ -226,6 +245,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
}
|
||||
|
||||
out, _ = sjson.SetRaw(out, "messages.-1", msg)
|
||||
messageIndex++
|
||||
|
||||
case "tool":
|
||||
// Handle tool result messages conversion
|
||||
@@ -236,6 +256,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
|
||||
msg, _ = sjson.Set(msg, "content.0.tool_use_id", toolCallID)
|
||||
msg, _ = sjson.Set(msg, "content.0.content", content)
|
||||
out, _ = sjson.SetRaw(out, "messages.-1", msg)
|
||||
messageIndex++
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -53,10 +53,11 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
|
||||
|
||||
root := gjson.ParseBytes(rawJSON)
|
||||
|
||||
if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
// Convert OpenAI Responses reasoning.effort to Claude thinking config.
|
||||
if v := root.Get("reasoning.effort"); v.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(v.String()))
|
||||
if effort != "" {
|
||||
budget, ok := util.ThinkingEffortToBudget(modelName, effort)
|
||||
budget, ok := thinking.ConvertLevelToBudget(effort)
|
||||
if ok {
|
||||
switch budget {
|
||||
case 0:
|
||||
|
||||
@@ -251,6 +251,7 @@ func ConvertClaudeResponseToOpenAIResponses(ctx context.Context, modelName strin
|
||||
itemDone, _ = sjson.Set(itemDone, "item.id", fmt.Sprintf("fc_%s", st.CurrentFCID))
|
||||
itemDone, _ = sjson.Set(itemDone, "item.arguments", args)
|
||||
itemDone, _ = sjson.Set(itemDone, "item.call_id", st.CurrentFCID)
|
||||
itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx])
|
||||
out = append(out, emitEvent("response.output_item.done", itemDone))
|
||||
st.InFuncBlock = false
|
||||
} else if st.ReasoningActive {
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -51,7 +51,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
systemsResult := rootResult.Get("system")
|
||||
if systemsResult.IsArray() {
|
||||
systemResults := systemsResult.Array()
|
||||
message := `{"type":"message","role":"user","content":[]}`
|
||||
message := `{"type":"message","role":"developer","content":[]}`
|
||||
for i := 0; i < len(systemResults); i++ {
|
||||
systemResult := systemResults[i]
|
||||
systemTypeResult := systemResult.Get("type")
|
||||
@@ -217,21 +217,19 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
// Add additional configuration parameters for the Codex API.
|
||||
template, _ = sjson.Set(template, "parallel_tool_calls", true)
|
||||
|
||||
// Convert thinking.budget_tokens to reasoning.effort for level-based models
|
||||
reasoningEffort := "medium" // default
|
||||
if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
|
||||
switch thinking.Get("type").String() {
|
||||
// Convert thinking.budget_tokens to reasoning.effort.
|
||||
reasoningEffort := "medium"
|
||||
if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
switch thinkingConfig.Get("type").String() {
|
||||
case "enabled":
|
||||
if util.ModelUsesThinkingLevels(modelName) {
|
||||
if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
budget := int(budgetTokens.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
reasoningEffort = effort
|
||||
}
|
||||
if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
budget := int(budgetTokens.Int())
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
|
||||
reasoningEffort = effort
|
||||
}
|
||||
}
|
||||
case "disabled":
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
|
||||
reasoningEffort = effort
|
||||
}
|
||||
}
|
||||
@@ -243,21 +241,23 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
template, _ = sjson.Set(template, "include", []string{"reasoning.encrypted_content"})
|
||||
|
||||
// Add a first message to ignore system instructions and ensure proper execution.
|
||||
inputResult := gjson.Get(template, "input")
|
||||
if inputResult.Exists() && inputResult.IsArray() {
|
||||
inputResults := inputResult.Array()
|
||||
newInput := "[]"
|
||||
for i := 0; i < len(inputResults); i++ {
|
||||
if i == 0 {
|
||||
firstText := inputResults[i].Get("content.0.text")
|
||||
firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
|
||||
if firstText.Exists() && firstText.String() != firstInstructions {
|
||||
newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
|
||||
if misc.GetCodexInstructionsEnabled() {
|
||||
inputResult := gjson.Get(template, "input")
|
||||
if inputResult.Exists() && inputResult.IsArray() {
|
||||
inputResults := inputResult.Array()
|
||||
newInput := "[]"
|
||||
for i := 0; i < len(inputResults); i++ {
|
||||
if i == 0 {
|
||||
firstText := inputResults[i].Get("content.0.text")
|
||||
firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
|
||||
if firstText.Exists() && firstText.String() != firstInstructions {
|
||||
newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
|
||||
}
|
||||
}
|
||||
newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
|
||||
}
|
||||
newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
|
||||
template, _ = sjson.SetRaw(template, "input", newInput)
|
||||
}
|
||||
template, _ = sjson.SetRaw(template, "input", newInput)
|
||||
}
|
||||
|
||||
return []byte(template)
|
||||
|
||||
@@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
|
||||
}
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int())
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int())
|
||||
inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage"))
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", inputTokens)
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", outputTokens)
|
||||
if cachedTokens > 0 {
|
||||
template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens)
|
||||
}
|
||||
|
||||
output = "event: message_delta\n"
|
||||
output += fmt.Sprintf("data: %s\n\n", template)
|
||||
@@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
|
||||
out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
out, _ = sjson.Set(out, "id", responseData.Get("id").String())
|
||||
out, _ = sjson.Set(out, "model", responseData.Get("model").String())
|
||||
out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
|
||||
out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
|
||||
inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage"))
|
||||
out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
|
||||
out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
|
||||
if cachedTokens > 0 {
|
||||
out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
|
||||
}
|
||||
|
||||
hasToolCall := false
|
||||
|
||||
@@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
|
||||
out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw)
|
||||
}
|
||||
|
||||
if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() {
|
||||
out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
|
||||
out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
|
||||
return out
|
||||
}
|
||||
|
||||
func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) {
|
||||
if !usage.Exists() || usage.Type == gjson.Null {
|
||||
return 0, 0, 0
|
||||
}
|
||||
|
||||
return out
|
||||
inputTokens := usage.Get("input_tokens").Int()
|
||||
outputTokens := usage.Get("output_tokens").Int()
|
||||
cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int()
|
||||
|
||||
if cachedTokens > 0 {
|
||||
if inputTokens >= cachedTokens {
|
||||
inputTokens -= cachedTokens
|
||||
} else {
|
||||
inputTokens = 0
|
||||
}
|
||||
}
|
||||
|
||||
return inputTokens, outputTokens, cachedTokens
|
||||
}
|
||||
|
||||
// buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools.
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -93,7 +94,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
// System instruction -> as a user message with input_text parts
|
||||
sysParts := root.Get("system_instruction.parts")
|
||||
if sysParts.IsArray() {
|
||||
msg := `{"type":"message","role":"user","content":[]}`
|
||||
msg := `{"type":"message","role":"developer","content":[]}`
|
||||
arr := sysParts.Array()
|
||||
for i := 0; i < len(arr); i++ {
|
||||
p := arr[i]
|
||||
@@ -247,21 +248,28 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
// Fixed flags aligning with Codex expectations
|
||||
out, _ = sjson.Set(out, "parallel_tool_calls", true)
|
||||
|
||||
// Convert thinkingBudget to reasoning.effort for level-based models
|
||||
reasoningEffort := "medium" // default
|
||||
// Convert Gemini thinkingConfig to Codex reasoning.effort.
|
||||
effortSet := false
|
||||
if genConfig := root.Get("generationConfig"); genConfig.Exists() {
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
if util.ModelUsesThinkingLevels(modelName) {
|
||||
if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
|
||||
budget := int(thinkingBudget.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
reasoningEffort = effort
|
||||
}
|
||||
if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
|
||||
if effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning.effort", effort)
|
||||
effortSet = true
|
||||
}
|
||||
} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
|
||||
out, _ = sjson.Set(out, "reasoning.effort", effort)
|
||||
effortSet = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
|
||||
if !effortSet {
|
||||
// No thinking config, set default effort
|
||||
out, _ = sjson.Set(out, "reasoning.effort", "medium")
|
||||
}
|
||||
out, _ = sjson.Set(out, "reasoning.summary", "auto")
|
||||
out, _ = sjson.Set(out, "stream", true)
|
||||
out, _ = sjson.Set(out, "store", false)
|
||||
|
||||
@@ -33,7 +33,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
|
||||
rawJSON := bytes.Clone(inputRawJSON)
|
||||
userAgent := misc.ExtractCodexUserAgent(rawJSON)
|
||||
// Start with empty JSON object
|
||||
out := `{}`
|
||||
out := `{"instructions":""}`
|
||||
|
||||
// Stream must be set to true
|
||||
out, _ = sjson.Set(out, "stream", stream)
|
||||
@@ -98,7 +98,9 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
|
||||
// Extract system instructions from first system message (string or text object)
|
||||
messages := gjson.GetBytes(rawJSON, "messages")
|
||||
_, instructions := misc.CodexInstructionsForModel(modelName, "", userAgent)
|
||||
out, _ = sjson.Set(out, "instructions", instructions)
|
||||
if misc.GetCodexInstructionsEnabled() {
|
||||
out, _ = sjson.Set(out, "instructions", instructions)
|
||||
}
|
||||
// if messages.IsArray() {
|
||||
// arr := messages.Array()
|
||||
// for i := 0; i < len(arr); i++ {
|
||||
@@ -141,7 +143,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
|
||||
msg := `{}`
|
||||
msg, _ = sjson.Set(msg, "type", "message")
|
||||
if role == "system" {
|
||||
msg, _ = sjson.Set(msg, "role", "user")
|
||||
msg, _ = sjson.Set(msg, "role", "developer")
|
||||
} else {
|
||||
msg, _ = sjson.Set(msg, "role", role)
|
||||
}
|
||||
|
||||
@@ -74,6 +74,11 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
|
||||
}
|
||||
|
||||
if hasOfficialInstructions {
|
||||
newInput := "[]"
|
||||
for _, item := range inputResults {
|
||||
newInput, _ = sjson.SetRaw(newInput, "-1", item.Raw)
|
||||
}
|
||||
rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(newInput))
|
||||
return rawJSON
|
||||
}
|
||||
// log.Debugf("instructions not matched, %s\n", originalInstructions)
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -160,12 +159,12 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
}
|
||||
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,37 +35,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
// Model
|
||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||
|
||||
// Reasoning effort -> thinkingBudget/include_thoughts
|
||||
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||
// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
|
||||
// Inline translation-only mapping; capability checks happen later in ApplyThinking.
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
|
||||
if v := tc.Get("thinkingBudget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
} else if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
|
||||
if v := tc.Get("includeThoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget && budget != 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
if re.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
||||
if effort != "" {
|
||||
thinkingPath := "request.generationConfig.thinkingConfig"
|
||||
if effort == "auto" {
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
|
||||
} else {
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -81,6 +63,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num)
|
||||
}
|
||||
|
||||
// Candidate count (OpenAI 'n' parameter)
|
||||
if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
|
||||
if val := n.Int(); val > 1 {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.candidateCount", val)
|
||||
}
|
||||
}
|
||||
|
||||
// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
|
||||
// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
|
||||
if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
|
||||
@@ -147,6 +136,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
}
|
||||
}
|
||||
|
||||
systemPartIndex := 0
|
||||
for i := 0; i < len(arr); i++ {
|
||||
m := arr[i]
|
||||
role := m.Get("role").String()
|
||||
@@ -156,16 +146,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
// system -> request.systemInstruction as a user message style
|
||||
if content.Type == gjson.String {
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
|
||||
systemPartIndex++
|
||||
} else if content.IsObject() && content.Get("type").String() == "text" {
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
|
||||
systemPartIndex++
|
||||
} else if content.IsArray() {
|
||||
contents := content.Array()
|
||||
if len(contents) > 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
|
||||
for j := 0; j < len(contents); j++ {
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
|
||||
systemPartIndex++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -153,13 +152,13 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
}
|
||||
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
// Translator only does format conversion, ApplyThinking handles model capability validation.
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,55 +35,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
// Model
|
||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||
|
||||
// Reasoning effort -> thinkingBudget/include_thoughts
|
||||
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||
// Only apply numeric budgets for models that use budgets (not discrete levels) to avoid
|
||||
// incorrectly applying thinkingBudget for level-based models like gpt-5. Gemini 3 models
|
||||
// use thinkingLevel/includeThoughts instead.
|
||||
// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig.
|
||||
// Inline translation-only mapping; capability checks happen later in ApplyThinking.
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if re.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
||||
if util.IsGemini3Model(modelName) {
|
||||
switch effort {
|
||||
case "none":
|
||||
out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig")
|
||||
case "auto":
|
||||
includeThoughts := true
|
||||
out = util.ApplyGeminiThinkingLevel(out, "", &includeThoughts)
|
||||
default:
|
||||
if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
|
||||
out = util.ApplyGeminiThinkingLevel(out, level, nil)
|
||||
}
|
||||
}
|
||||
} else if !util.ModelUsesThinkingLevels(modelName) {
|
||||
out = util.ApplyReasoningEffortToGemini(out, effort)
|
||||
}
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
|
||||
if v := tc.Get("thinkingBudget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
} else if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
|
||||
if v := tc.Get("includeThoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget && budget != 0 {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
if effort != "" {
|
||||
thinkingPath := "generationConfig.thinkingConfig"
|
||||
if effort == "auto" {
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
|
||||
} else {
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
|
||||
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -99,6 +63,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num)
|
||||
}
|
||||
|
||||
// Candidate count (OpenAI 'n' parameter)
|
||||
if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
|
||||
if val := n.Int(); val > 1 {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.candidateCount", val)
|
||||
}
|
||||
}
|
||||
|
||||
// Map OpenAI modalities -> Gemini generationConfig.responseModalities
|
||||
// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
|
||||
if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
|
||||
@@ -165,6 +136,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
}
|
||||
}
|
||||
|
||||
systemPartIndex := 0
|
||||
for i := 0; i < len(arr); i++ {
|
||||
m := arr[i]
|
||||
role := m.Get("role").String()
|
||||
@@ -174,16 +146,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
// system -> system_instruction as a user message style
|
||||
if content.Type == gjson.String {
|
||||
out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
|
||||
out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.String())
|
||||
systemPartIndex++
|
||||
} else if content.IsObject() && content.Get("type").String() == "text" {
|
||||
out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
|
||||
out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.Get("text").String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.Get("text").String())
|
||||
systemPartIndex++
|
||||
} else if content.IsArray() {
|
||||
contents := content.Array()
|
||||
if len(contents) > 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
|
||||
out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
|
||||
for j := 0; j < len(contents); j++ {
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
|
||||
out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
|
||||
systemPartIndex++
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -198,7 +173,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
for _, item := range items {
|
||||
switch item.Get("type").String() {
|
||||
case "text":
|
||||
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
|
||||
text := item.Get("text").String()
|
||||
if text != "" {
|
||||
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
|
||||
}
|
||||
p++
|
||||
case "image_url":
|
||||
imageURL := item.Get("image_url.url").String()
|
||||
@@ -243,6 +221,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
for _, item := range content.Array() {
|
||||
switch item.Get("type").String() {
|
||||
case "text":
|
||||
text := item.Get("text").String()
|
||||
if text != "" {
|
||||
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
|
||||
}
|
||||
p++
|
||||
case "image_url":
|
||||
// If the assistant returned an inline data URL, preserve it for history fidelity.
|
||||
|
||||
@@ -21,7 +21,8 @@ import (
|
||||
// convertGeminiResponseToOpenAIChatParams holds parameters for response conversion.
|
||||
type convertGeminiResponseToOpenAIChatParams struct {
|
||||
UnixTimestamp int64
|
||||
FunctionIndex int
|
||||
// FunctionIndex tracks tool call indices per candidate index to support multiple candidates.
|
||||
FunctionIndex map[int]int
|
||||
}
|
||||
|
||||
// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
|
||||
@@ -42,13 +43,20 @@ var functionCallIDCounter uint64
|
||||
// Returns:
|
||||
// - []string: A slice of strings, each containing an OpenAI-compatible JSON response
|
||||
func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string {
|
||||
// Initialize parameters if nil.
|
||||
if *param == nil {
|
||||
*param = &convertGeminiResponseToOpenAIChatParams{
|
||||
UnixTimestamp: 0,
|
||||
FunctionIndex: 0,
|
||||
FunctionIndex: make(map[int]int),
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the Map is initialized (handling cases where param might be reused from older context).
|
||||
p := (*param).(*convertGeminiResponseToOpenAIChatParams)
|
||||
if p.FunctionIndex == nil {
|
||||
p.FunctionIndex = make(map[int]int)
|
||||
}
|
||||
|
||||
if bytes.HasPrefix(rawJSON, []byte("data:")) {
|
||||
rawJSON = bytes.TrimSpace(rawJSON[5:])
|
||||
}
|
||||
@@ -57,151 +65,179 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
|
||||
return []string{}
|
||||
}
|
||||
|
||||
// Initialize the OpenAI SSE template.
|
||||
template := `{"id":"","object":"chat.completion.chunk","created":12345,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
|
||||
// Initialize the OpenAI SSE base template.
|
||||
// We use a base template and clone it for each candidate to support multiple candidates.
|
||||
baseTemplate := `{"id":"","object":"chat.completion.chunk","created":12345,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
|
||||
|
||||
// Extract and set the model version.
|
||||
if modelVersionResult := gjson.GetBytes(rawJSON, "modelVersion"); modelVersionResult.Exists() {
|
||||
template, _ = sjson.Set(template, "model", modelVersionResult.String())
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "model", modelVersionResult.String())
|
||||
}
|
||||
|
||||
// Extract and set the creation timestamp.
|
||||
if createTimeResult := gjson.GetBytes(rawJSON, "createTime"); createTimeResult.Exists() {
|
||||
t, err := time.Parse(time.RFC3339Nano, createTimeResult.String())
|
||||
if err == nil {
|
||||
(*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp = t.Unix()
|
||||
p.UnixTimestamp = t.Unix()
|
||||
}
|
||||
template, _ = sjson.Set(template, "created", (*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp)
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "created", p.UnixTimestamp)
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "created", (*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp)
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "created", p.UnixTimestamp)
|
||||
}
|
||||
|
||||
// Extract and set the response ID.
|
||||
if responseIDResult := gjson.GetBytes(rawJSON, "responseId"); responseIDResult.Exists() {
|
||||
template, _ = sjson.Set(template, "id", responseIDResult.String())
|
||||
}
|
||||
|
||||
// Extract and set the finish reason.
|
||||
if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
|
||||
template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "id", responseIDResult.String())
|
||||
}
|
||||
|
||||
// Extract and set usage metadata (token counts).
|
||||
// Usage is applied to the base template so it appears in the chunks.
|
||||
if usageResult := gjson.GetBytes(rawJSON, "usageMetadata"); usageResult.Exists() {
|
||||
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
|
||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||
template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "usage.completion_tokens", candidatesTokenCountResult.Int())
|
||||
}
|
||||
if totalTokenCountResult := usageResult.Get("totalTokenCount"); totalTokenCountResult.Exists() {
|
||||
template, _ = sjson.Set(template, "usage.total_tokens", totalTokenCountResult.Int())
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "usage.total_tokens", totalTokenCountResult.Int())
|
||||
}
|
||||
promptTokenCount := usageResult.Get("promptTokenCount").Int() - cachedTokenCount
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
|
||||
if thoughtsTokenCount > 0 {
|
||||
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
|
||||
baseTemplate, _ = sjson.Set(baseTemplate, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
|
||||
}
|
||||
// Include cached token count if present (indicates prompt caching is working)
|
||||
if cachedTokenCount > 0 {
|
||||
var err error
|
||||
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
|
||||
baseTemplate, err = sjson.Set(baseTemplate, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
|
||||
if err != nil {
|
||||
log.Warnf("gemini openai response: failed to set cached_tokens in streaming: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process the main content part of the response.
|
||||
partsResult := gjson.GetBytes(rawJSON, "candidates.0.content.parts")
|
||||
hasFunctionCall := false
|
||||
if partsResult.IsArray() {
|
||||
partResults := partsResult.Array()
|
||||
for i := 0; i < len(partResults); i++ {
|
||||
partResult := partResults[i]
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
inlineDataResult := partResult.Get("inlineData")
|
||||
if !inlineDataResult.Exists() {
|
||||
inlineDataResult = partResult.Get("inline_data")
|
||||
}
|
||||
thoughtSignatureResult := partResult.Get("thoughtSignature")
|
||||
if !thoughtSignatureResult.Exists() {
|
||||
thoughtSignatureResult = partResult.Get("thought_signature")
|
||||
var responseStrings []string
|
||||
candidates := gjson.GetBytes(rawJSON, "candidates")
|
||||
|
||||
// Iterate over all candidates to support candidate_count > 1.
|
||||
if candidates.IsArray() {
|
||||
candidates.ForEach(func(_, candidate gjson.Result) bool {
|
||||
// Clone the template for the current candidate.
|
||||
template := baseTemplate
|
||||
|
||||
// Set the specific index for this candidate.
|
||||
candidateIndex := int(candidate.Get("index").Int())
|
||||
template, _ = sjson.Set(template, "choices.0.index", candidateIndex)
|
||||
|
||||
// Extract and set the finish reason.
|
||||
if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
|
||||
template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
}
|
||||
|
||||
hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
|
||||
hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
|
||||
partsResult := candidate.Get("content.parts")
|
||||
hasFunctionCall := false
|
||||
|
||||
// Skip pure thoughtSignature parts but keep any actual payload in the same part.
|
||||
if hasThoughtSignature && !hasContentPayload {
|
||||
continue
|
||||
if partsResult.IsArray() {
|
||||
partResults := partsResult.Array()
|
||||
for i := 0; i < len(partResults); i++ {
|
||||
partResult := partResults[i]
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
inlineDataResult := partResult.Get("inlineData")
|
||||
if !inlineDataResult.Exists() {
|
||||
inlineDataResult = partResult.Get("inline_data")
|
||||
}
|
||||
thoughtSignatureResult := partResult.Get("thoughtSignature")
|
||||
if !thoughtSignatureResult.Exists() {
|
||||
thoughtSignatureResult = partResult.Get("thought_signature")
|
||||
}
|
||||
|
||||
hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
|
||||
hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
|
||||
|
||||
// Skip pure thoughtSignature parts but keep any actual payload in the same part.
|
||||
if hasThoughtSignature && !hasContentPayload {
|
||||
continue
|
||||
}
|
||||
|
||||
if partTextResult.Exists() {
|
||||
text := partTextResult.String()
|
||||
// Handle text content, distinguishing between regular content and reasoning/thoughts.
|
||||
if partResult.Get("thought").Bool() {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text)
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.content", text)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Handle function call content.
|
||||
hasFunctionCall = true
|
||||
toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
|
||||
|
||||
// Retrieve the function index for this specific candidate.
|
||||
functionCallIndex := p.FunctionIndex[candidateIndex]
|
||||
p.FunctionIndex[candidateIndex]++
|
||||
|
||||
if toolCallsResult.Exists() && toolCallsResult.IsArray() {
|
||||
functionCallIndex = len(toolCallsResult.Array())
|
||||
} else {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
|
||||
}
|
||||
|
||||
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
|
||||
} else if inlineDataResult.Exists() {
|
||||
data := inlineDataResult.Get("data").String()
|
||||
if data == "" {
|
||||
continue
|
||||
}
|
||||
mimeType := inlineDataResult.Get("mimeType").String()
|
||||
if mimeType == "" {
|
||||
mimeType = inlineDataResult.Get("mime_type").String()
|
||||
}
|
||||
if mimeType == "" {
|
||||
mimeType = "image/png"
|
||||
}
|
||||
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
|
||||
imagesResult := gjson.Get(template, "choices.0.delta.images")
|
||||
if !imagesResult.Exists() || !imagesResult.IsArray() {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
|
||||
}
|
||||
imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array())
|
||||
imagePayload := `{"type":"image_url","image_url":{"url":""}}`
|
||||
imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
|
||||
imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if partTextResult.Exists() {
|
||||
text := partTextResult.String()
|
||||
// Handle text content, distinguishing between regular content and reasoning/thoughts.
|
||||
if partResult.Get("thought").Bool() {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text)
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.content", text)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Handle function call content.
|
||||
hasFunctionCall = true
|
||||
toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
|
||||
functionCallIndex := (*param).(*convertGeminiResponseToOpenAIChatParams).FunctionIndex
|
||||
(*param).(*convertGeminiResponseToOpenAIChatParams).FunctionIndex++
|
||||
if toolCallsResult.Exists() && toolCallsResult.IsArray() {
|
||||
functionCallIndex = len(toolCallsResult.Array())
|
||||
} else {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
|
||||
}
|
||||
|
||||
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
|
||||
} else if inlineDataResult.Exists() {
|
||||
data := inlineDataResult.Get("data").String()
|
||||
if data == "" {
|
||||
continue
|
||||
}
|
||||
mimeType := inlineDataResult.Get("mimeType").String()
|
||||
if mimeType == "" {
|
||||
mimeType = inlineDataResult.Get("mime_type").String()
|
||||
}
|
||||
if mimeType == "" {
|
||||
mimeType = "image/png"
|
||||
}
|
||||
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
|
||||
imagesResult := gjson.Get(template, "choices.0.delta.images")
|
||||
if !imagesResult.Exists() || !imagesResult.IsArray() {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
|
||||
}
|
||||
imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array())
|
||||
imagePayload := `{"type":"image_url","image_url":{"url":""}}`
|
||||
imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
|
||||
imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload)
|
||||
if hasFunctionCall {
|
||||
template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
|
||||
template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
|
||||
}
|
||||
|
||||
responseStrings = append(responseStrings, template)
|
||||
return true // continue loop
|
||||
})
|
||||
} else {
|
||||
// If there are no candidates (e.g., a pure usageMetadata chunk), return the usage chunk if present.
|
||||
if gjson.GetBytes(rawJSON, "usageMetadata").Exists() && len(responseStrings) == 0 {
|
||||
responseStrings = append(responseStrings, baseTemplate)
|
||||
}
|
||||
}
|
||||
|
||||
if hasFunctionCall {
|
||||
template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
|
||||
template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
|
||||
}
|
||||
|
||||
return []string{template}
|
||||
return responseStrings
|
||||
}
|
||||
|
||||
// ConvertGeminiResponseToOpenAINonStream converts a non-streaming Gemini response to a non-streaming OpenAI response.
|
||||
@@ -219,7 +255,9 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
|
||||
// - string: An OpenAI-compatible JSON response containing all message content and metadata
|
||||
func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
|
||||
var unixTimestamp int64
|
||||
template := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
|
||||
// Initialize template with an empty choices array to support multiple candidates.
|
||||
template := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[]}`
|
||||
|
||||
if modelVersionResult := gjson.GetBytes(rawJSON, "modelVersion"); modelVersionResult.Exists() {
|
||||
template, _ = sjson.Set(template, "model", modelVersionResult.String())
|
||||
}
|
||||
@@ -238,11 +276,6 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
|
||||
template, _ = sjson.Set(template, "id", responseIDResult.String())
|
||||
}
|
||||
|
||||
if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
|
||||
template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
}
|
||||
|
||||
if usageResult := gjson.GetBytes(rawJSON, "usageMetadata"); usageResult.Exists() {
|
||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||
template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
|
||||
@@ -267,74 +300,96 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
|
||||
}
|
||||
}
|
||||
|
||||
// Process the main content part of the response.
|
||||
partsResult := gjson.GetBytes(rawJSON, "candidates.0.content.parts")
|
||||
hasFunctionCall := false
|
||||
if partsResult.IsArray() {
|
||||
partsResults := partsResult.Array()
|
||||
for i := 0; i < len(partsResults); i++ {
|
||||
partResult := partsResults[i]
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
inlineDataResult := partResult.Get("inlineData")
|
||||
if !inlineDataResult.Exists() {
|
||||
inlineDataResult = partResult.Get("inline_data")
|
||||
// Process the main content part of the response for all candidates.
|
||||
candidates := gjson.GetBytes(rawJSON, "candidates")
|
||||
if candidates.IsArray() {
|
||||
candidates.ForEach(func(_, candidate gjson.Result) bool {
|
||||
// Construct a single Choice object.
|
||||
choiceTemplate := `{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}`
|
||||
|
||||
// Set the index for this choice.
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "index", candidate.Get("index").Int())
|
||||
|
||||
// Set finish reason.
|
||||
if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "native_finish_reason", strings.ToLower(finishReasonResult.String()))
|
||||
}
|
||||
|
||||
if partTextResult.Exists() {
|
||||
// Append text content, distinguishing between regular content and reasoning.
|
||||
if partResult.Get("thought").Bool() {
|
||||
template, _ = sjson.Set(template, "choices.0.message.reasoning_content", partTextResult.String())
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "choices.0.message.content", partTextResult.String())
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Append function call content to the tool_calls array.
|
||||
hasFunctionCall = true
|
||||
toolCallsResult := gjson.Get(template, "choices.0.message.tool_calls")
|
||||
if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls", `[]`)
|
||||
}
|
||||
functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate)
|
||||
} else if inlineDataResult.Exists() {
|
||||
data := inlineDataResult.Get("data").String()
|
||||
if data == "" {
|
||||
continue
|
||||
}
|
||||
mimeType := inlineDataResult.Get("mimeType").String()
|
||||
if mimeType == "" {
|
||||
mimeType = inlineDataResult.Get("mime_type").String()
|
||||
}
|
||||
if mimeType == "" {
|
||||
mimeType = "image/png"
|
||||
}
|
||||
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
|
||||
imagesResult := gjson.Get(template, "choices.0.message.images")
|
||||
if !imagesResult.Exists() || !imagesResult.IsArray() {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`)
|
||||
}
|
||||
imageIndex := len(gjson.Get(template, "choices.0.message.images").Array())
|
||||
imagePayload := `{"type":"image_url","image_url":{"url":""}}`
|
||||
imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
|
||||
imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
|
||||
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", imagePayload)
|
||||
}
|
||||
}
|
||||
}
|
||||
partsResult := candidate.Get("content.parts")
|
||||
hasFunctionCall := false
|
||||
if partsResult.IsArray() {
|
||||
partsResults := partsResult.Array()
|
||||
for i := 0; i < len(partsResults); i++ {
|
||||
partResult := partsResults[i]
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
inlineDataResult := partResult.Get("inlineData")
|
||||
if !inlineDataResult.Exists() {
|
||||
inlineDataResult = partResult.Get("inline_data")
|
||||
}
|
||||
|
||||
if hasFunctionCall {
|
||||
template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
|
||||
template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
|
||||
if partTextResult.Exists() {
|
||||
// Append text content, distinguishing between regular content and reasoning.
|
||||
if partResult.Get("thought").Bool() {
|
||||
oldVal := gjson.Get(choiceTemplate, "message.reasoning_content").String()
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "message.reasoning_content", oldVal+partTextResult.String())
|
||||
} else {
|
||||
oldVal := gjson.Get(choiceTemplate, "message.content").String()
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "message.content", oldVal+partTextResult.String())
|
||||
}
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Append function call content to the tool_calls array.
|
||||
hasFunctionCall = true
|
||||
toolCallsResult := gjson.Get(choiceTemplate, "message.tool_calls")
|
||||
if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
|
||||
choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.tool_calls", `[]`)
|
||||
}
|
||||
functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
|
||||
choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.tool_calls.-1", functionCallItemTemplate)
|
||||
} else if inlineDataResult.Exists() {
|
||||
data := inlineDataResult.Get("data").String()
|
||||
if data != "" {
|
||||
mimeType := inlineDataResult.Get("mimeType").String()
|
||||
if mimeType == "" {
|
||||
mimeType = inlineDataResult.Get("mime_type").String()
|
||||
}
|
||||
if mimeType == "" {
|
||||
mimeType = "image/png"
|
||||
}
|
||||
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
|
||||
imagesResult := gjson.Get(choiceTemplate, "message.images")
|
||||
if !imagesResult.Exists() || !imagesResult.IsArray() {
|
||||
choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.images", `[]`)
|
||||
}
|
||||
imageIndex := len(gjson.Get(choiceTemplate, "message.images").Array())
|
||||
imagePayload := `{"type":"image_url","image_url":{"url":""}}`
|
||||
imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
|
||||
imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
|
||||
choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.images.-1", imagePayload)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if hasFunctionCall {
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "finish_reason", "tool_calls")
|
||||
choiceTemplate, _ = sjson.Set(choiceTemplate, "native_finish_reason", "tool_calls")
|
||||
}
|
||||
|
||||
// Append the constructed choice to the main choices array.
|
||||
template, _ = sjson.SetRaw(template, "choices.-1", choiceTemplate)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
return template
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -388,31 +387,19 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
||||
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
|
||||
}
|
||||
|
||||
// OpenAI official reasoning fields take precedence
|
||||
// Only convert for models that use numeric budgets (not discrete levels).
|
||||
hasOfficialThinking := root.Get("reasoning.effort").Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
reasoningEffort := root.Get("reasoning.effort")
|
||||
out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
|
||||
}
|
||||
|
||||
// Cherry Studio extension (applies only when official fields are missing)
|
||||
// Only apply for models that use numeric budgets, not discrete levels.
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
|
||||
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var budget int
|
||||
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget {
|
||||
if budget != 0 {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
// Apply thinking configuration: convert OpenAI Responses API reasoning.effort to Gemini thinkingConfig.
|
||||
// Inline translation-only mapping; capability checks happen later in ApplyThinking.
|
||||
re := root.Get("reasoning.effort")
|
||||
if re.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
||||
if effort != "" {
|
||||
thinkingPath := "generationConfig.thinkingConfig"
|
||||
if effort == "auto" {
|
||||
out, _ = sjson.Set(out, thinkingPath+".thinkingBudget", -1)
|
||||
out, _ = sjson.Set(out, thinkingPath+".includeThoughts", true)
|
||||
} else {
|
||||
out, _ = sjson.Set(out, thinkingPath+".thinkingLevel", effort)
|
||||
out, _ = sjson.Set(out, thinkingPath+".includeThoughts", effort != "none")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -61,23 +61,23 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
out, _ = sjson.Set(out, "stream", stream)
|
||||
|
||||
// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
|
||||
if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() {
|
||||
if thinkingType := thinking.Get("type"); thinkingType.Exists() {
|
||||
if thinkingConfig := root.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
if thinkingType := thinkingConfig.Get("type"); thinkingType.Exists() {
|
||||
switch thinkingType.String() {
|
||||
case "enabled":
|
||||
if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
|
||||
budget := int(budgetTokens.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
} else {
|
||||
// No budget_tokens specified, default to "auto" for enabled thinking
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, -1); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(-1); ok && effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
}
|
||||
case "disabled":
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
}
|
||||
@@ -88,7 +88,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
var messagesJSON = "[]"
|
||||
|
||||
// Handle system message first
|
||||
systemMsgJSON := `{"role":"system","content":[{"type":"text","text":"Use ANY tool, the parameters MUST accord with RFC 8259 (The JavaScript Object Notation (JSON) Data Interchange Format), the keys and value MUST be enclosed in double quotes."}]}`
|
||||
systemMsgJSON := `{"role":"system","content":[]}`
|
||||
if system := root.Get("system"); system.Exists() {
|
||||
if system.Type == gjson.String {
|
||||
if system.String() != "" {
|
||||
@@ -129,7 +129,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
case "thinking":
|
||||
// Only map thinking to reasoning_content for assistant messages (security: prevent injection)
|
||||
if role == "assistant" {
|
||||
thinkingText := util.GetThinkingText(part)
|
||||
thinkingText := thinking.GetThinkingText(part)
|
||||
// Skip empty or whitespace-only thinking
|
||||
if strings.TrimSpace(thinkingText) != "" {
|
||||
reasoningParts = append(reasoningParts, thinkingText)
|
||||
|
||||
@@ -289,21 +289,17 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
|
||||
// Only process if usage has actual values (not null)
|
||||
if param.FinishReason != "" {
|
||||
usage := root.Get("usage")
|
||||
var inputTokens, outputTokens int64
|
||||
var inputTokens, outputTokens, cachedTokens int64
|
||||
if usage.Exists() && usage.Type != gjson.Null {
|
||||
// Check if usage has actual token counts
|
||||
promptTokens := usage.Get("prompt_tokens")
|
||||
completionTokens := usage.Get("completion_tokens")
|
||||
|
||||
if promptTokens.Exists() && completionTokens.Exists() {
|
||||
inputTokens = promptTokens.Int()
|
||||
outputTokens = completionTokens.Int()
|
||||
}
|
||||
inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage)
|
||||
// Send message_delta with usage
|
||||
messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
|
||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
|
||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
|
||||
if cachedTokens > 0 {
|
||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens)
|
||||
}
|
||||
results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
|
||||
param.MessageDeltaSent = true
|
||||
|
||||
@@ -423,13 +419,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string {
|
||||
|
||||
// Set usage information
|
||||
if usage := root.Get("usage"); usage.Exists() {
|
||||
out, _ = sjson.Set(out, "usage.input_tokens", usage.Get("prompt_tokens").Int())
|
||||
out, _ = sjson.Set(out, "usage.output_tokens", usage.Get("completion_tokens").Int())
|
||||
reasoningTokens := int64(0)
|
||||
if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() {
|
||||
reasoningTokens = v.Int()
|
||||
inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage)
|
||||
out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
|
||||
out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
|
||||
if cachedTokens > 0 {
|
||||
out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
|
||||
}
|
||||
out, _ = sjson.Set(out, "usage.reasoning_tokens", reasoningTokens)
|
||||
}
|
||||
|
||||
return []string{out}
|
||||
@@ -674,8 +669,12 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
|
||||
}
|
||||
|
||||
if respUsage := root.Get("usage"); respUsage.Exists() {
|
||||
out, _ = sjson.Set(out, "usage.input_tokens", respUsage.Get("prompt_tokens").Int())
|
||||
out, _ = sjson.Set(out, "usage.output_tokens", respUsage.Get("completion_tokens").Int())
|
||||
inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage)
|
||||
out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
|
||||
out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
|
||||
if cachedTokens > 0 {
|
||||
out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
|
||||
}
|
||||
}
|
||||
|
||||
if !stopReasonSet {
|
||||
@@ -692,3 +691,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
|
||||
func ClaudeTokenCount(ctx context.Context, count int64) string {
|
||||
return fmt.Sprintf(`{"input_tokens":%d}`, count)
|
||||
}
|
||||
|
||||
func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) {
|
||||
if !usage.Exists() || usage.Type == gjson.Null {
|
||||
return 0, 0, 0
|
||||
}
|
||||
|
||||
inputTokens := usage.Get("prompt_tokens").Int()
|
||||
outputTokens := usage.Get("completion_tokens").Int()
|
||||
cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int()
|
||||
|
||||
if cachedTokens > 0 {
|
||||
if inputTokens >= cachedTokens {
|
||||
inputTokens -= cachedTokens
|
||||
} else {
|
||||
inputTokens = 0
|
||||
}
|
||||
}
|
||||
|
||||
return inputTokens, outputTokens, cachedTokens
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"math/big"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -77,12 +77,21 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
}
|
||||
}
|
||||
|
||||
// Convert thinkingBudget to reasoning_effort
|
||||
// Candidate count (OpenAI 'n' parameter)
|
||||
if candidateCount := genConfig.Get("candidateCount"); candidateCount.Exists() {
|
||||
out, _ = sjson.Set(out, "n", candidateCount.Int())
|
||||
}
|
||||
|
||||
// Map Gemini thinkingConfig to OpenAI reasoning_effort.
|
||||
// Always perform conversion to support allowCompat models that may not be in registry
|
||||
if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
|
||||
if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
|
||||
budget := int(thinkingBudget.Int())
|
||||
if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
|
||||
if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
|
||||
effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
|
||||
if effort != "" {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
|
||||
if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,10 @@ import (
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
type oaiToResponsesStateReasoning struct {
|
||||
ReasoningID string
|
||||
ReasoningData string
|
||||
}
|
||||
type oaiToResponsesState struct {
|
||||
Seq int
|
||||
ResponseID string
|
||||
@@ -23,6 +27,7 @@ type oaiToResponsesState struct {
|
||||
// Per-output message text buffers by index
|
||||
MsgTextBuf map[int]*strings.Builder
|
||||
ReasoningBuf strings.Builder
|
||||
Reasonings []oaiToResponsesStateReasoning
|
||||
FuncArgsBuf map[int]*strings.Builder // index -> args
|
||||
FuncNames map[int]string // index -> name
|
||||
FuncCallIDs map[int]string // index -> call_id
|
||||
@@ -63,6 +68,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
MsgItemDone: make(map[int]bool),
|
||||
FuncArgsDone: make(map[int]bool),
|
||||
FuncItemDone: make(map[int]bool),
|
||||
Reasonings: make([]oaiToResponsesStateReasoning, 0),
|
||||
}
|
||||
}
|
||||
st := (*param).(*oaiToResponsesState)
|
||||
@@ -157,6 +163,31 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
st.Started = true
|
||||
}
|
||||
|
||||
stopReasoning := func(text string) {
|
||||
// Emit reasoning done events
|
||||
textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}`
|
||||
textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq())
|
||||
textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID)
|
||||
textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex)
|
||||
textDone, _ = sjson.Set(textDone, "text", text)
|
||||
out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone))
|
||||
partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}`
|
||||
partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
|
||||
partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID)
|
||||
partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex)
|
||||
partDone, _ = sjson.Set(partDone, "part.text", text)
|
||||
out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone))
|
||||
outputItemDone := `{"type":"response.output_item.done","item":{"id":"","type":"reasoning","encrypted_content":"","summary":[{"type":"summary_text","text":""}]},"output_index":0,"sequence_number":0}`
|
||||
outputItemDone, _ = sjson.Set(outputItemDone, "sequence_number", nextSeq())
|
||||
outputItemDone, _ = sjson.Set(outputItemDone, "item.id", st.ReasoningID)
|
||||
outputItemDone, _ = sjson.Set(outputItemDone, "output_index", st.ReasoningIndex)
|
||||
outputItemDone, _ = sjson.Set(outputItemDone, "item.summary.text", text)
|
||||
out = append(out, emitRespEvent("response.output_item.done", outputItemDone))
|
||||
|
||||
st.Reasonings = append(st.Reasonings, oaiToResponsesStateReasoning{ReasoningID: st.ReasoningID, ReasoningData: text})
|
||||
st.ReasoningID = ""
|
||||
}
|
||||
|
||||
// choices[].delta content / tool_calls / reasoning_content
|
||||
if choices := root.Get("choices"); choices.Exists() && choices.IsArray() {
|
||||
choices.ForEach(func(_, choice gjson.Result) bool {
|
||||
@@ -165,6 +196,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
if delta.Exists() {
|
||||
if c := delta.Get("content"); c.Exists() && c.String() != "" {
|
||||
// Ensure the message item and its first content part are announced before any text deltas
|
||||
if st.ReasoningID != "" {
|
||||
stopReasoning(st.ReasoningBuf.String())
|
||||
st.ReasoningBuf.Reset()
|
||||
}
|
||||
if !st.MsgItemAdded[idx] {
|
||||
item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"in_progress","content":[],"role":"assistant"}}`
|
||||
item, _ = sjson.Set(item, "sequence_number", nextSeq())
|
||||
@@ -226,6 +261,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
|
||||
// tool calls
|
||||
if tcs := delta.Get("tool_calls"); tcs.Exists() && tcs.IsArray() {
|
||||
if st.ReasoningID != "" {
|
||||
stopReasoning(st.ReasoningBuf.String())
|
||||
st.ReasoningBuf.Reset()
|
||||
}
|
||||
// Before emitting any function events, if a message is open for this index,
|
||||
// close its text/content to match Codex expected ordering.
|
||||
if st.MsgItemAdded[idx] && !st.MsgItemDone[idx] {
|
||||
@@ -361,17 +400,8 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
}
|
||||
|
||||
if st.ReasoningID != "" {
|
||||
// Emit reasoning done events
|
||||
textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}`
|
||||
textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq())
|
||||
textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID)
|
||||
textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex)
|
||||
out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone))
|
||||
partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}`
|
||||
partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
|
||||
partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID)
|
||||
partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex)
|
||||
out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone))
|
||||
stopReasoning(st.ReasoningBuf.String())
|
||||
st.ReasoningBuf.Reset()
|
||||
}
|
||||
|
||||
// Emit function call done events for any active function calls
|
||||
@@ -485,11 +515,13 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
}
|
||||
// Build response.output using aggregated buffers
|
||||
outputsWrapper := `{"arr":[]}`
|
||||
if st.ReasoningBuf.Len() > 0 {
|
||||
item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`
|
||||
item, _ = sjson.Set(item, "id", st.ReasoningID)
|
||||
item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String())
|
||||
outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
|
||||
if len(st.Reasonings) > 0 {
|
||||
for _, r := range st.Reasonings {
|
||||
item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`
|
||||
item, _ = sjson.Set(item, "id", r.ReasoningID)
|
||||
item, _ = sjson.Set(item, "summary.0.text", r.ReasoningData)
|
||||
outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
|
||||
}
|
||||
}
|
||||
// Append message items in ascending index order
|
||||
if len(st.MsgItemAdded) > 0 {
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
|
||||
// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
|
||||
// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
|
||||
func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
|
||||
if budget == nil {
|
||||
return body
|
||||
}
|
||||
if gjson.GetBytes(body, "thinking").Exists() {
|
||||
return body
|
||||
}
|
||||
if *budget <= 0 {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
|
||||
updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
|
||||
return updated
|
||||
}
|
||||
|
||||
// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
|
||||
// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
|
||||
// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
|
||||
func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
|
||||
if !ModelSupportsThinking(modelName) {
|
||||
return nil, false
|
||||
}
|
||||
budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
|
||||
if !matched {
|
||||
return nil, false
|
||||
}
|
||||
if include != nil && !*include {
|
||||
return nil, true
|
||||
}
|
||||
if budget == nil {
|
||||
return nil, true
|
||||
}
|
||||
normalized := NormalizeThinkingBudget(modelName, *budget)
|
||||
if normalized <= 0 {
|
||||
return nil, true
|
||||
}
|
||||
return &normalized, true
|
||||
}
|
||||
@@ -19,6 +19,7 @@ func CleanJSONSchemaForAntigravity(jsonStr string) string {
|
||||
// Phase 1: Convert and add hints
|
||||
jsonStr = convertRefsToHints(jsonStr)
|
||||
jsonStr = convertConstToEnum(jsonStr)
|
||||
jsonStr = convertEnumValuesToStrings(jsonStr)
|
||||
jsonStr = addEnumHints(jsonStr)
|
||||
jsonStr = addAdditionalPropertiesHints(jsonStr)
|
||||
jsonStr = moveConstraintsToDescription(jsonStr)
|
||||
@@ -77,6 +78,33 @@ func convertConstToEnum(jsonStr string) string {
|
||||
return jsonStr
|
||||
}
|
||||
|
||||
// convertEnumValuesToStrings ensures all enum values are strings.
|
||||
// Gemini API requires enum values to be of type string, not numbers or booleans.
|
||||
func convertEnumValuesToStrings(jsonStr string) string {
|
||||
for _, p := range findPaths(jsonStr, "enum") {
|
||||
arr := gjson.Get(jsonStr, p)
|
||||
if !arr.IsArray() {
|
||||
continue
|
||||
}
|
||||
|
||||
var stringVals []string
|
||||
needsConversion := false
|
||||
for _, item := range arr.Array() {
|
||||
// Check if any value is not a string
|
||||
if item.Type != gjson.String {
|
||||
needsConversion = true
|
||||
}
|
||||
stringVals = append(stringVals, item.String())
|
||||
}
|
||||
|
||||
// Only update if we found non-string values
|
||||
if needsConversion {
|
||||
jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
|
||||
}
|
||||
}
|
||||
return jsonStr
|
||||
}
|
||||
|
||||
func addEnumHints(jsonStr string) string {
|
||||
for _, p := range findPaths(jsonStr, "enum") {
|
||||
arr := gjson.Get(jsonStr, p)
|
||||
|
||||
@@ -818,3 +818,54 @@ func TestCleanJSONSchemaForAntigravity_MultipleFormats(t *testing.T) {
|
||||
t.Errorf("date-time format hint should be added, got: %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanJSONSchemaForAntigravity_NumericEnumToString(t *testing.T) {
|
||||
// Gemini API requires enum values to be strings, not numbers
|
||||
input := `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"priority": {"type": "integer", "enum": [0, 1, 2]},
|
||||
"level": {"type": "number", "enum": [1.5, 2.5, 3.5]},
|
||||
"status": {"type": "string", "enum": ["active", "inactive"]}
|
||||
}
|
||||
}`
|
||||
|
||||
result := CleanJSONSchemaForAntigravity(input)
|
||||
|
||||
// Numeric enum values should be converted to strings
|
||||
if strings.Contains(result, `"enum":[0,1,2]`) {
|
||||
t.Errorf("Integer enum values should be converted to strings, got: %s", result)
|
||||
}
|
||||
if strings.Contains(result, `"enum":[1.5,2.5,3.5]`) {
|
||||
t.Errorf("Float enum values should be converted to strings, got: %s", result)
|
||||
}
|
||||
// Should contain string versions
|
||||
if !strings.Contains(result, `"0"`) || !strings.Contains(result, `"1"`) || !strings.Contains(result, `"2"`) {
|
||||
t.Errorf("Integer enum values should be converted to string format, got: %s", result)
|
||||
}
|
||||
// String enum values should remain unchanged
|
||||
if !strings.Contains(result, `"active"`) || !strings.Contains(result, `"inactive"`) {
|
||||
t.Errorf("String enum values should remain unchanged, got: %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) {
|
||||
// Boolean enum values should also be converted to strings
|
||||
input := `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {"type": "boolean", "enum": [true, false]}
|
||||
}
|
||||
}`
|
||||
|
||||
result := CleanJSONSchemaForAntigravity(input)
|
||||
|
||||
// Boolean enum values should be converted to strings
|
||||
if strings.Contains(result, `"enum":[true,false]`) {
|
||||
t.Errorf("Boolean enum values should be converted to strings, got: %s", result)
|
||||
}
|
||||
// Should contain string versions "true" and "false"
|
||||
if !strings.Contains(result, `"true"`) || !strings.Contains(result, `"false"`) {
|
||||
t.Errorf("Boolean enum values should be converted to string format, got: %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,617 +0,0 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
const (
|
||||
GeminiThinkingBudgetMetadataKey = "gemini_thinking_budget"
|
||||
GeminiIncludeThoughtsMetadataKey = "gemini_include_thoughts"
|
||||
GeminiOriginalModelMetadataKey = "gemini_original_model"
|
||||
)
|
||||
|
||||
// Gemini model family detection patterns
|
||||
var (
|
||||
gemini3Pattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`)
|
||||
gemini3ProPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]pro`)
|
||||
gemini3FlashPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]flash`)
|
||||
gemini25Pattern = regexp.MustCompile(`(?i)^gemini[_-]?2\.5[_-]`)
|
||||
)
|
||||
|
||||
// IsGemini3Model returns true if the model is a Gemini 3 family model.
|
||||
// Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number).
|
||||
func IsGemini3Model(model string) bool {
|
||||
return gemini3Pattern.MatchString(model)
|
||||
}
|
||||
|
||||
// IsGemini3ProModel returns true if the model is a Gemini 3 Pro variant.
|
||||
// Gemini 3 Pro supports thinkingLevel: "low", "high" (default: "high")
|
||||
func IsGemini3ProModel(model string) bool {
|
||||
return gemini3ProPattern.MatchString(model)
|
||||
}
|
||||
|
||||
// IsGemini3FlashModel returns true if the model is a Gemini 3 Flash variant.
|
||||
// Gemini 3 Flash supports thinkingLevel: "minimal", "low", "medium", "high" (default: "high")
|
||||
func IsGemini3FlashModel(model string) bool {
|
||||
return gemini3FlashPattern.MatchString(model)
|
||||
}
|
||||
|
||||
// IsGemini25Model returns true if the model is a Gemini 2.5 family model.
|
||||
// Gemini 2.5 models should use thinkingBudget (number).
|
||||
func IsGemini25Model(model string) bool {
|
||||
return gemini25Pattern.MatchString(model)
|
||||
}
|
||||
|
||||
// Gemini3ProThinkingLevels are the valid thinkingLevel values for Gemini 3 Pro models.
|
||||
var Gemini3ProThinkingLevels = []string{"low", "high"}
|
||||
|
||||
// Gemini3FlashThinkingLevels are the valid thinkingLevel values for Gemini 3 Flash models.
|
||||
var Gemini3FlashThinkingLevels = []string{"minimal", "low", "medium", "high"}
|
||||
|
||||
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
||||
if budget == nil && includeThoughts == nil {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
if budget != nil {
|
||||
valuePath := "generationConfig.thinkingConfig.thinkingBudget"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, *budget)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
// Default to including thoughts when a budget override is present but no explicit include flag is provided.
|
||||
incl := includeThoughts
|
||||
if incl == nil && budget != nil && *budget != 0 {
|
||||
defaultInclude := true
|
||||
incl = &defaultInclude
|
||||
}
|
||||
if incl != nil {
|
||||
if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
|
||||
!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
|
||||
valuePath := "generationConfig.thinkingConfig.include_thoughts"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
||||
if budget == nil && includeThoughts == nil {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
if budget != nil {
|
||||
valuePath := "request.generationConfig.thinkingConfig.thinkingBudget"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, *budget)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
// Default to including thoughts when a budget override is present but no explicit include flag is provided.
|
||||
incl := includeThoughts
|
||||
if incl == nil && budget != nil && *budget != 0 {
|
||||
defaultInclude := true
|
||||
incl = &defaultInclude
|
||||
}
|
||||
if incl != nil {
|
||||
if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
|
||||
!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
|
||||
valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// ApplyGeminiThinkingLevel applies thinkingLevel config for Gemini 3 models.
|
||||
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||
// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
|
||||
func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
|
||||
if level == "" && includeThoughts == nil {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
if level != "" {
|
||||
valuePath := "generationConfig.thinkingConfig.thinkingLevel"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, level)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
// Default to including thoughts when a level is set but no explicit include flag is provided.
|
||||
incl := includeThoughts
|
||||
if incl == nil && level != "" {
|
||||
defaultInclude := true
|
||||
incl = &defaultInclude
|
||||
}
|
||||
if incl != nil {
|
||||
if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
|
||||
!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
|
||||
valuePath := "generationConfig.thinkingConfig.includeThoughts"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
}
|
||||
if tb := gjson.GetBytes(body, "generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
|
||||
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig.thinkingBudget")
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// ApplyGeminiCLIThinkingLevel applies thinkingLevel config for Gemini 3 models.
|
||||
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||
// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
|
||||
func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
|
||||
if level == "" && includeThoughts == nil {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
if level != "" {
|
||||
valuePath := "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, level)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
// Default to including thoughts when a level is set but no explicit include flag is provided.
|
||||
incl := includeThoughts
|
||||
if incl == nil && level != "" {
|
||||
defaultInclude := true
|
||||
incl = &defaultInclude
|
||||
}
|
||||
if incl != nil {
|
||||
if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
|
||||
!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
|
||||
valuePath := "request.generationConfig.thinkingConfig.includeThoughts"
|
||||
rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
|
||||
if err == nil {
|
||||
updated = rewritten
|
||||
}
|
||||
}
|
||||
}
|
||||
if tb := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
|
||||
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// ValidateGemini3ThinkingLevel validates that the thinkingLevel is valid for the Gemini 3 model variant.
|
||||
// Returns the validated level (normalized to lowercase) and true if valid, or empty string and false if invalid.
|
||||
func ValidateGemini3ThinkingLevel(model, level string) (string, bool) {
|
||||
if level == "" {
|
||||
return "", false
|
||||
}
|
||||
normalized := strings.ToLower(strings.TrimSpace(level))
|
||||
|
||||
var validLevels []string
|
||||
if IsGemini3ProModel(model) {
|
||||
validLevels = Gemini3ProThinkingLevels
|
||||
} else if IsGemini3FlashModel(model) {
|
||||
validLevels = Gemini3FlashThinkingLevels
|
||||
} else if IsGemini3Model(model) {
|
||||
// Unknown Gemini 3 variant - allow all levels as fallback
|
||||
validLevels = Gemini3FlashThinkingLevels
|
||||
} else {
|
||||
return "", false
|
||||
}
|
||||
|
||||
for _, valid := range validLevels {
|
||||
if normalized == valid {
|
||||
return normalized, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// ThinkingBudgetToGemini3Level converts a thinkingBudget to a thinkingLevel for Gemini 3 models.
|
||||
// This provides backward compatibility when thinkingBudget is provided for Gemini 3 models.
|
||||
// Returns the appropriate thinkingLevel and true if conversion is possible.
|
||||
func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) {
|
||||
if !IsGemini3Model(model) {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Map budget to level based on Google's documentation
|
||||
// Gemini 3 Pro: "low", "high" (default: "high")
|
||||
// Gemini 3 Flash: "minimal", "low", "medium", "high" (default: "high")
|
||||
switch {
|
||||
case budget == -1:
|
||||
// Dynamic budget maps to "high" (API default)
|
||||
return "high", true
|
||||
case budget == 0:
|
||||
// Zero budget - Gemini 3 doesn't support disabling thinking
|
||||
// Map to lowest available level
|
||||
if IsGemini3FlashModel(model) {
|
||||
return "minimal", true
|
||||
}
|
||||
return "low", true
|
||||
case budget > 0 && budget <= 512:
|
||||
if IsGemini3FlashModel(model) {
|
||||
return "minimal", true
|
||||
}
|
||||
return "low", true
|
||||
case budget <= 1024:
|
||||
return "low", true
|
||||
case budget <= 8192:
|
||||
if IsGemini3FlashModel(model) {
|
||||
return "medium", true
|
||||
}
|
||||
return "low", true // Pro doesn't have medium, use low
|
||||
default:
|
||||
return "high", true
|
||||
}
|
||||
}
|
||||
|
||||
// modelsWithDefaultThinking lists models that should have thinking enabled by default
|
||||
// when no explicit thinkingConfig is provided.
|
||||
// Note: Gemini 3 models are NOT included here because per Google's official documentation:
|
||||
// - thinkingLevel defaults to "high" (dynamic thinking)
|
||||
// - includeThoughts defaults to false
|
||||
//
|
||||
// We should not override these API defaults; let users explicitly configure if needed.
|
||||
var modelsWithDefaultThinking = map[string]bool{
|
||||
// "gemini-3-pro-preview": true,
|
||||
// "gemini-3-pro-image-preview": true,
|
||||
// "gemini-3-flash-preview": true,
|
||||
}
|
||||
|
||||
// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
|
||||
func ModelHasDefaultThinking(model string) bool {
|
||||
return modelsWithDefaultThinking[model]
|
||||
}
|
||||
|
||||
// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
|
||||
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
||||
// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
|
||||
func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
|
||||
if !ModelHasDefaultThinking(model) {
|
||||
return body
|
||||
}
|
||||
if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
|
||||
return body
|
||||
}
|
||||
// Gemini 3 models use thinkingLevel instead of thinkingBudget
|
||||
if IsGemini3Model(model) {
|
||||
// Don't set a default - let the API use its dynamic default ("high")
|
||||
// Only set includeThoughts
|
||||
updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
return updated
|
||||
}
|
||||
// Gemini 2.5 and other models use thinkingBudget
|
||||
updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
return updated
|
||||
}
|
||||
|
||||
// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
|
||||
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
|
||||
// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
|
||||
func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
|
||||
// Use the alias from metadata if available for model type detection
|
||||
lookupModel := ResolveOriginalModel(model, metadata)
|
||||
if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
|
||||
return body
|
||||
}
|
||||
|
||||
// Determine which model to use for validation
|
||||
checkModel := model
|
||||
if IsGemini3Model(lookupModel) {
|
||||
checkModel = lookupModel
|
||||
}
|
||||
|
||||
// First try to get effort string from metadata
|
||||
effort, ok := ReasoningEffortFromMetadata(metadata)
|
||||
if ok && effort != "" {
|
||||
if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
|
||||
return ApplyGeminiThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: check for numeric budget and convert to thinkingLevel
|
||||
budget, _, _, matched := ThinkingFromMetadata(metadata)
|
||||
if matched && budget != nil {
|
||||
if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
|
||||
return ApplyGeminiThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
|
||||
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
|
||||
// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
|
||||
func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
|
||||
// Use the alias from metadata if available for model type detection
|
||||
lookupModel := ResolveOriginalModel(model, metadata)
|
||||
if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
|
||||
return body
|
||||
}
|
||||
|
||||
// Determine which model to use for validation
|
||||
checkModel := model
|
||||
if IsGemini3Model(lookupModel) {
|
||||
checkModel = lookupModel
|
||||
}
|
||||
|
||||
// First try to get effort string from metadata
|
||||
effort, ok := ReasoningEffortFromMetadata(metadata)
|
||||
if ok && effort != "" {
|
||||
if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
|
||||
return ApplyGeminiCLIThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: check for numeric budget and convert to thinkingLevel
|
||||
budget, _, _, matched := ThinkingFromMetadata(metadata)
|
||||
if matched && budget != nil {
|
||||
if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
|
||||
return ApplyGeminiCLIThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
|
||||
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
||||
// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
|
||||
func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
|
||||
// Use the alias from metadata if available for model property lookup
|
||||
lookupModel := ResolveOriginalModel(model, metadata)
|
||||
if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
|
||||
return body
|
||||
}
|
||||
if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
|
||||
return body
|
||||
}
|
||||
// Gemini 3 models use thinkingLevel instead of thinkingBudget
|
||||
if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
|
||||
// Don't set a default - let the API use its dynamic default ("high")
|
||||
// Only set includeThoughts
|
||||
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
return updated
|
||||
}
|
||||
// Gemini 2.5 and other models use thinkingBudget
|
||||
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
return updated
|
||||
}
|
||||
|
||||
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
|
||||
// when the target model does not advertise Thinking capability. It cleans both
|
||||
// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
|
||||
// in case upstream injected thinking for an unsupported model.
|
||||
func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
|
||||
if ModelSupportsThinking(model) || len(body) == 0 {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
// Gemini CLI path
|
||||
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
|
||||
// Standard Gemini path
|
||||
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
|
||||
return updated
|
||||
}
|
||||
|
||||
// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
|
||||
// request body (generationConfig.thinkingConfig.thinkingBudget path).
|
||||
// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation,
|
||||
// unless skipGemini3Check is provided and true.
|
||||
func NormalizeGeminiThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte {
|
||||
const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
|
||||
const levelPath = "generationConfig.thinkingConfig.thinkingLevel"
|
||||
|
||||
budget := gjson.GetBytes(body, budgetPath)
|
||||
if !budget.Exists() {
|
||||
return body
|
||||
}
|
||||
|
||||
// For Gemini 3 models, convert thinkingBudget to thinkingLevel
|
||||
skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
|
||||
if IsGemini3Model(model) && !skipGemini3 {
|
||||
if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
|
||||
updated, _ := sjson.SetBytes(body, levelPath, level)
|
||||
updated, _ = sjson.DeleteBytes(updated, budgetPath)
|
||||
return updated
|
||||
}
|
||||
// If conversion fails, just remove the budget (let API use default)
|
||||
updated, _ := sjson.DeleteBytes(body, budgetPath)
|
||||
return updated
|
||||
}
|
||||
|
||||
// For Gemini 2.5 and other models, normalize the budget value
|
||||
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
||||
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
||||
return updated
|
||||
}
|
||||
|
||||
// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
|
||||
// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
|
||||
// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation,
|
||||
// unless skipGemini3Check is provided and true.
|
||||
func NormalizeGeminiCLIThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte {
|
||||
const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
|
||||
const levelPath = "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||
|
||||
budget := gjson.GetBytes(body, budgetPath)
|
||||
if !budget.Exists() {
|
||||
return body
|
||||
}
|
||||
|
||||
// For Gemini 3 models, convert thinkingBudget to thinkingLevel
|
||||
skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
|
||||
if IsGemini3Model(model) && !skipGemini3 {
|
||||
if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
|
||||
updated, _ := sjson.SetBytes(body, levelPath, level)
|
||||
updated, _ = sjson.DeleteBytes(updated, budgetPath)
|
||||
return updated
|
||||
}
|
||||
// If conversion fails, just remove the budget (let API use default)
|
||||
updated, _ := sjson.DeleteBytes(body, budgetPath)
|
||||
return updated
|
||||
}
|
||||
|
||||
// For Gemini 2.5 and other models, normalize the budget value
|
||||
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
||||
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
||||
return updated
|
||||
}
|
||||
|
||||
// ReasoningEffortBudgetMapping defines the thinkingBudget values for each reasoning effort level.
|
||||
var ReasoningEffortBudgetMapping = map[string]int{
|
||||
"none": 0,
|
||||
"auto": -1,
|
||||
"minimal": 512,
|
||||
"low": 1024,
|
||||
"medium": 8192,
|
||||
"high": 24576,
|
||||
"xhigh": 32768,
|
||||
}
|
||||
|
||||
// ApplyReasoningEffortToGemini applies OpenAI reasoning_effort to Gemini thinkingConfig
|
||||
// for standard Gemini API format (generationConfig.thinkingConfig path).
|
||||
// Returns the modified body with thinkingBudget and include_thoughts set.
|
||||
func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
|
||||
normalized := strings.ToLower(strings.TrimSpace(effort))
|
||||
if normalized == "" {
|
||||
return body
|
||||
}
|
||||
|
||||
budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
|
||||
includePath := "generationConfig.thinkingConfig.include_thoughts"
|
||||
|
||||
if normalized == "none" {
|
||||
body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
|
||||
return body
|
||||
}
|
||||
|
||||
budget, ok := ReasoningEffortBudgetMapping[normalized]
|
||||
if !ok {
|
||||
return body
|
||||
}
|
||||
|
||||
body, _ = sjson.SetBytes(body, budgetPath, budget)
|
||||
body, _ = sjson.SetBytes(body, includePath, true)
|
||||
return body
|
||||
}
|
||||
|
||||
// ApplyReasoningEffortToGeminiCLI applies OpenAI reasoning_effort to Gemini CLI thinkingConfig
|
||||
// for Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||
// Returns the modified body with thinkingBudget and include_thoughts set.
|
||||
func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
|
||||
normalized := strings.ToLower(strings.TrimSpace(effort))
|
||||
if normalized == "" {
|
||||
return body
|
||||
}
|
||||
|
||||
budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
|
||||
includePath := "request.generationConfig.thinkingConfig.include_thoughts"
|
||||
|
||||
if normalized == "none" {
|
||||
body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
|
||||
return body
|
||||
}
|
||||
|
||||
budget, ok := ReasoningEffortBudgetMapping[normalized]
|
||||
if !ok {
|
||||
return body
|
||||
}
|
||||
|
||||
body, _ = sjson.SetBytes(body, budgetPath, budget)
|
||||
body, _ = sjson.SetBytes(body, includePath, true)
|
||||
return body
|
||||
}
|
||||
|
||||
// ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
|
||||
// and converts it to "thinkingBudget" for Gemini 2.5 models.
|
||||
// For Gemini 3 models, preserves thinkingLevel unless skipGemini3Check is provided and true.
|
||||
// Mappings for Gemini 2.5:
|
||||
// - "high" -> 32768
|
||||
// - "medium" -> 8192
|
||||
// - "low" -> 1024
|
||||
// - "minimal" -> 512
|
||||
//
|
||||
// It removes "thinkingLevel" after conversion (for Gemini 2.5 only).
|
||||
func ConvertThinkingLevelToBudget(body []byte, model string, skipGemini3Check ...bool) []byte {
|
||||
levelPath := "generationConfig.thinkingConfig.thinkingLevel"
|
||||
res := gjson.GetBytes(body, levelPath)
|
||||
if !res.Exists() {
|
||||
return body
|
||||
}
|
||||
|
||||
// For Gemini 3 models, preserve thinkingLevel unless explicitly skipped
|
||||
skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
|
||||
if IsGemini3Model(model) && !skipGemini3 {
|
||||
return body
|
||||
}
|
||||
|
||||
budget, ok := ThinkingLevelToBudget(res.String())
|
||||
if !ok {
|
||||
updated, _ := sjson.DeleteBytes(body, levelPath)
|
||||
return updated
|
||||
}
|
||||
|
||||
budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
|
||||
updated, err := sjson.SetBytes(body, budgetPath, budget)
|
||||
if err != nil {
|
||||
return body
|
||||
}
|
||||
|
||||
updated, err = sjson.DeleteBytes(updated, levelPath)
|
||||
if err != nil {
|
||||
return body
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// ConvertThinkingLevelToBudgetCLI checks for "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||
// and converts it to "thinkingBudget" for Gemini 2.5 models.
|
||||
// For Gemini 3 models, preserves thinkingLevel as-is (does not convert).
|
||||
func ConvertThinkingLevelToBudgetCLI(body []byte, model string) []byte {
|
||||
levelPath := "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||
res := gjson.GetBytes(body, levelPath)
|
||||
if !res.Exists() {
|
||||
return body
|
||||
}
|
||||
|
||||
// For Gemini 3 models, preserve thinkingLevel - don't convert to budget
|
||||
if IsGemini3Model(model) {
|
||||
return body
|
||||
}
|
||||
|
||||
budget, ok := ThinkingLevelToBudget(res.String())
|
||||
if !ok {
|
||||
updated, _ := sjson.DeleteBytes(body, levelPath)
|
||||
return updated
|
||||
}
|
||||
|
||||
budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
|
||||
updated, err := sjson.SetBytes(body, budgetPath, budget)
|
||||
if err != nil {
|
||||
return body
|
||||
}
|
||||
|
||||
updated, err = sjson.DeleteBytes(updated, levelPath)
|
||||
if err != nil {
|
||||
return body
|
||||
}
|
||||
return updated
|
||||
}
|
||||
@@ -1,245 +0,0 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
// ModelSupportsThinking reports whether the given model has Thinking capability
|
||||
// according to the model registry metadata (provider-agnostic).
|
||||
func ModelSupportsThinking(model string) bool {
|
||||
if model == "" {
|
||||
return false
|
||||
}
|
||||
// First check the global dynamic registry
|
||||
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
|
||||
return info.Thinking != nil
|
||||
}
|
||||
// Fallback: check static model definitions
|
||||
if info := registry.LookupStaticModelInfo(model); info != nil {
|
||||
return info.Thinking != nil
|
||||
}
|
||||
// Fallback: check Antigravity static config
|
||||
if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil {
|
||||
return cfg.Thinking != nil
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// NormalizeThinkingBudget clamps the requested thinking budget to the
|
||||
// supported range for the specified model using registry metadata only.
|
||||
// If the model is unknown or has no Thinking metadata, returns the original budget.
|
||||
// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
|
||||
// or min (0 if zero is allowed and mid <= 0).
|
||||
func NormalizeThinkingBudget(model string, budget int) int {
|
||||
if budget == -1 { // dynamic
|
||||
if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
|
||||
if dynamicAllowed {
|
||||
return -1
|
||||
}
|
||||
mid := (minBudget + maxBudget) / 2
|
||||
if mid <= 0 && zeroAllowed {
|
||||
return 0
|
||||
}
|
||||
if mid <= 0 {
|
||||
return minBudget
|
||||
}
|
||||
return mid
|
||||
}
|
||||
return -1
|
||||
}
|
||||
if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
|
||||
if budget == 0 {
|
||||
if zeroAllowed {
|
||||
return 0
|
||||
}
|
||||
return minBudget
|
||||
}
|
||||
if budget < minBudget {
|
||||
return minBudget
|
||||
}
|
||||
if budget > maxBudget {
|
||||
return maxBudget
|
||||
}
|
||||
return budget
|
||||
}
|
||||
return budget
|
||||
}
|
||||
|
||||
// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
|
||||
func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
|
||||
if model == "" {
|
||||
return false, 0, 0, false, false
|
||||
}
|
||||
// First check global dynamic registry
|
||||
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil {
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
}
|
||||
// Fallback: check static model definitions
|
||||
if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil {
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
}
|
||||
// Fallback: check Antigravity static config
|
||||
if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil {
|
||||
return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed
|
||||
}
|
||||
return false, 0, 0, false, false
|
||||
}
|
||||
|
||||
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
|
||||
// Returns nil if the model has no thinking support or no levels defined.
|
||||
func GetModelThinkingLevels(model string) []string {
|
||||
if model == "" {
|
||||
return nil
|
||||
}
|
||||
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||
if info == nil || info.Thinking == nil {
|
||||
return nil
|
||||
}
|
||||
return info.Thinking.Levels
|
||||
}
|
||||
|
||||
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
|
||||
// effort levels instead of numeric budgets.
|
||||
func ModelUsesThinkingLevels(model string) bool {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
return len(levels) > 0
|
||||
}
|
||||
|
||||
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
|
||||
// level for the given model. Returns false when the level is not supported.
|
||||
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
if len(levels) == 0 {
|
||||
return "", false
|
||||
}
|
||||
loweredEffort := strings.ToLower(strings.TrimSpace(effort))
|
||||
for _, lvl := range levels {
|
||||
if strings.ToLower(lvl) == loweredEffort {
|
||||
return lvl, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
|
||||
// These models may not advertise Thinking metadata in the registry.
|
||||
func IsOpenAICompatibilityModel(model string) bool {
|
||||
if model == "" {
|
||||
return false
|
||||
}
|
||||
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||
if info == nil {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility")
|
||||
}
|
||||
|
||||
// ThinkingEffortToBudget maps a reasoning effort level to a numeric thinking budget (tokens),
|
||||
// clamping the result to the model's supported range.
|
||||
//
|
||||
// Mappings (values are normalized to model's supported range):
|
||||
// - "none" -> 0
|
||||
// - "auto" -> -1
|
||||
// - "minimal" -> 512
|
||||
// - "low" -> 1024
|
||||
// - "medium" -> 8192
|
||||
// - "high" -> 24576
|
||||
// - "xhigh" -> 32768
|
||||
//
|
||||
// Returns false when the effort level is empty or unsupported.
|
||||
func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
||||
if effort == "" {
|
||||
return 0, false
|
||||
}
|
||||
normalized, ok := NormalizeReasoningEffortLevel(model, effort)
|
||||
if !ok {
|
||||
normalized = strings.ToLower(strings.TrimSpace(effort))
|
||||
}
|
||||
switch normalized {
|
||||
case "none":
|
||||
return 0, true
|
||||
case "auto":
|
||||
return NormalizeThinkingBudget(model, -1), true
|
||||
case "minimal":
|
||||
return NormalizeThinkingBudget(model, 512), true
|
||||
case "low":
|
||||
return NormalizeThinkingBudget(model, 1024), true
|
||||
case "medium":
|
||||
return NormalizeThinkingBudget(model, 8192), true
|
||||
case "high":
|
||||
return NormalizeThinkingBudget(model, 24576), true
|
||||
case "xhigh":
|
||||
return NormalizeThinkingBudget(model, 32768), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
// ThinkingLevelToBudget maps a Gemini thinkingLevel to a numeric thinking budget (tokens).
|
||||
//
|
||||
// Mappings:
|
||||
// - "minimal" -> 512
|
||||
// - "low" -> 1024
|
||||
// - "medium" -> 8192
|
||||
// - "high" -> 32768
|
||||
//
|
||||
// Returns false when the level is empty or unsupported.
|
||||
func ThinkingLevelToBudget(level string) (int, bool) {
|
||||
if level == "" {
|
||||
return 0, false
|
||||
}
|
||||
normalized := strings.ToLower(strings.TrimSpace(level))
|
||||
switch normalized {
|
||||
case "minimal":
|
||||
return 512, true
|
||||
case "low":
|
||||
return 1024, true
|
||||
case "medium":
|
||||
return 8192, true
|
||||
case "high":
|
||||
return 32768, true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
// ThinkingBudgetToEffort maps a numeric thinking budget (tokens)
|
||||
// to a reasoning effort level for level-based models.
|
||||
//
|
||||
// Mappings:
|
||||
// - 0 -> "none" (or lowest supported level if model doesn't support "none")
|
||||
// - -1 -> "auto"
|
||||
// - 1..1024 -> "low"
|
||||
// - 1025..8192 -> "medium"
|
||||
// - 8193..24576 -> "high"
|
||||
// - 24577.. -> highest supported level for the model (defaults to "xhigh")
|
||||
//
|
||||
// Returns false when the budget is unsupported (negative values other than -1).
|
||||
func ThinkingBudgetToEffort(model string, budget int) (string, bool) {
|
||||
switch {
|
||||
case budget == -1:
|
||||
return "auto", true
|
||||
case budget < -1:
|
||||
return "", false
|
||||
case budget == 0:
|
||||
if levels := GetModelThinkingLevels(model); len(levels) > 0 {
|
||||
return levels[0], true
|
||||
}
|
||||
return "none", true
|
||||
case budget > 0 && budget <= 1024:
|
||||
return "low", true
|
||||
case budget <= 8192:
|
||||
return "medium", true
|
||||
case budget <= 24576:
|
||||
return "high", true
|
||||
case budget > 24576:
|
||||
if levels := GetModelThinkingLevels(model); len(levels) > 0 {
|
||||
return levels[len(levels)-1], true
|
||||
}
|
||||
return "xhigh", true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
@@ -1,296 +0,0 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
ThinkingBudgetMetadataKey = "thinking_budget"
|
||||
ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
|
||||
ReasoningEffortMetadataKey = "reasoning_effort"
|
||||
ThinkingOriginalModelMetadataKey = "thinking_original_model"
|
||||
ModelMappingOriginalModelMetadataKey = "model_mapping_original_model"
|
||||
)
|
||||
|
||||
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
||||
// the normalized base model with extracted metadata. Supported pattern:
|
||||
// - "(<value>)" where value can be:
|
||||
// - A numeric budget (e.g., "(8192)", "(16384)")
|
||||
// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
|
||||
//
|
||||
// Examples:
|
||||
// - "claude-sonnet-4-5-20250929(16384)" → budget=16384
|
||||
// - "gpt-5.1(high)" → reasoning_effort="high"
|
||||
// - "gemini-2.5-pro(32768)" → budget=32768
|
||||
//
|
||||
// Note: Empty parentheses "()" are not supported and will be ignored.
|
||||
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||
if modelName == "" {
|
||||
return modelName, nil
|
||||
}
|
||||
|
||||
baseModel := modelName
|
||||
|
||||
var (
|
||||
budgetOverride *int
|
||||
reasoningEffort *string
|
||||
matched bool
|
||||
)
|
||||
|
||||
// Match "(<value>)" pattern at the end of the model name
|
||||
if idx := strings.LastIndex(modelName, "("); idx != -1 {
|
||||
if !strings.HasSuffix(modelName, ")") {
|
||||
// Incomplete parenthesis, ignore
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
|
||||
if value == "" {
|
||||
// Empty parentheses not supported
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
candidateBase := modelName[:idx]
|
||||
|
||||
// Auto-detect: pure numeric → budget, string → reasoning effort level
|
||||
if parsed, ok := parseIntPrefix(value); ok {
|
||||
// Numeric value: treat as thinking budget
|
||||
baseModel = candidateBase
|
||||
budgetOverride = &parsed
|
||||
matched = true
|
||||
} else {
|
||||
// String value: treat as reasoning effort level
|
||||
baseModel = candidateBase
|
||||
raw := strings.ToLower(strings.TrimSpace(value))
|
||||
if raw != "" {
|
||||
reasoningEffort = &raw
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !matched {
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
metadata := map[string]any{
|
||||
ThinkingOriginalModelMetadataKey: modelName,
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
|
||||
}
|
||||
if reasoningEffort != nil {
|
||||
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
|
||||
}
|
||||
return baseModel, metadata
|
||||
}
|
||||
|
||||
// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
|
||||
// It accepts both the new generic keys and legacy Gemini-specific keys.
|
||||
func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
|
||||
if len(metadata) == 0 {
|
||||
return nil, nil, nil, false
|
||||
}
|
||||
|
||||
var (
|
||||
budgetPtr *int
|
||||
includePtr *bool
|
||||
effortPtr *string
|
||||
matched bool
|
||||
)
|
||||
|
||||
readBudget := func(key string) {
|
||||
if budgetPtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
if v, okNumber := parseNumberToInt(raw); okNumber {
|
||||
budget := v
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readInclude := func(key string) {
|
||||
if includePtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
switch v := raw.(type) {
|
||||
case bool:
|
||||
val := v
|
||||
includePtr = &val
|
||||
matched = true
|
||||
case *bool:
|
||||
if v != nil {
|
||||
val := *v
|
||||
includePtr = &val
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readEffort := func(key string) {
|
||||
if effortPtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
|
||||
normalized := strings.ToLower(strings.TrimSpace(val))
|
||||
effortPtr = &normalized
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readBudget(ThinkingBudgetMetadataKey)
|
||||
readBudget(GeminiThinkingBudgetMetadataKey)
|
||||
readInclude(ThinkingIncludeThoughtsMetadataKey)
|
||||
readInclude(GeminiIncludeThoughtsMetadataKey)
|
||||
readEffort(ReasoningEffortMetadataKey)
|
||||
readEffort("reasoning.effort")
|
||||
|
||||
return budgetPtr, includePtr, effortPtr, matched
|
||||
}
|
||||
|
||||
// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
|
||||
// converting reasoning effort strings into budgets when possible.
|
||||
func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
return nil, nil, false
|
||||
}
|
||||
// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
|
||||
// Claude/Gemini-style protocols, so we don't derive budgets for them here.
|
||||
if ModelUsesThinkingLevels(model) {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
if budget == nil && effort != nil {
|
||||
if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
|
||||
budget = &derived
|
||||
}
|
||||
}
|
||||
return budget, include, budget != nil || include != nil || effort != nil
|
||||
}
|
||||
|
||||
// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
|
||||
// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
|
||||
func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
return "", false
|
||||
}
|
||||
if effort != nil && *effort != "" {
|
||||
return strings.ToLower(strings.TrimSpace(*effort)), true
|
||||
}
|
||||
if budget != nil {
|
||||
switch *budget {
|
||||
case -1:
|
||||
return "auto", true
|
||||
case 0:
|
||||
return "none", true
|
||||
}
|
||||
}
|
||||
if include != nil && !*include {
|
||||
return "none", true
|
||||
}
|
||||
return "", true
|
||||
}
|
||||
|
||||
// ResolveOriginalModel returns the original model name stored in metadata (if present),
|
||||
// otherwise falls back to the provided model.
|
||||
func ResolveOriginalModel(model string, metadata map[string]any) string {
|
||||
normalize := func(name string) string {
|
||||
if name == "" {
|
||||
return ""
|
||||
}
|
||||
if base, _ := NormalizeThinkingModel(name); base != "" {
|
||||
return base
|
||||
}
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
||||
if metadata != nil {
|
||||
if v, ok := metadata[ModelMappingOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||
if base := normalize(s); base != "" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
}
|
||||
if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||
if base := normalize(s); base != "" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
}
|
||||
if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||
if base := normalize(s); base != "" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: try to re-normalize the model name when metadata was dropped.
|
||||
if base := normalize(model); base != "" {
|
||||
return base
|
||||
}
|
||||
return model
|
||||
}
|
||||
|
||||
func parseIntPrefix(value string) (int, bool) {
|
||||
if value == "" {
|
||||
return 0, false
|
||||
}
|
||||
digits := strings.TrimLeft(value, "-")
|
||||
if digits == "" {
|
||||
return 0, false
|
||||
}
|
||||
end := len(digits)
|
||||
for i := 0; i < len(digits); i++ {
|
||||
if digits[i] < '0' || digits[i] > '9' {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if end == 0 {
|
||||
return 0, false
|
||||
}
|
||||
val, err := strconv.Atoi(digits[:end])
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return val, true
|
||||
}
|
||||
|
||||
func parseNumberToInt(raw any) (int, bool) {
|
||||
switch v := raw.(type) {
|
||||
case int:
|
||||
return v, true
|
||||
case int32:
|
||||
return int(v), true
|
||||
case int64:
|
||||
return int(v), true
|
||||
case float64:
|
||||
return int(v), true
|
||||
case json.Number:
|
||||
if val, err := v.Int64(); err == nil {
|
||||
return int(val), true
|
||||
}
|
||||
case string:
|
||||
if strings.TrimSpace(v) == "" {
|
||||
return 0, false
|
||||
}
|
||||
if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
|
||||
return parsed, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// GetThinkingText extracts the thinking text from a content part.
|
||||
// Handles various formats:
|
||||
// - Simple string: { "thinking": "text" } or { "text": "text" }
|
||||
// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
|
||||
// - Gemini-style: { "thought": true, "text": "text" }
|
||||
// Returns the extracted text string.
|
||||
func GetThinkingText(part gjson.Result) string {
|
||||
// Try direct text field first (Gemini-style)
|
||||
if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
|
||||
return text.String()
|
||||
}
|
||||
|
||||
// Try thinking field
|
||||
thinkingField := part.Get("thinking")
|
||||
if !thinkingField.Exists() {
|
||||
return ""
|
||||
}
|
||||
|
||||
// thinking is a string
|
||||
if thinkingField.Type == gjson.String {
|
||||
return thinkingField.String()
|
||||
}
|
||||
|
||||
// thinking is an object with inner text/thinking
|
||||
if thinkingField.IsObject() {
|
||||
if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String {
|
||||
return inner.String()
|
||||
}
|
||||
if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String {
|
||||
return inner.String()
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetThinkingTextFromJSON extracts thinking text from a raw JSON string.
|
||||
func GetThinkingTextFromJSON(jsonStr string) string {
|
||||
return GetThinkingText(gjson.Parse(jsonStr))
|
||||
}
|
||||
|
||||
// SanitizeThinkingPart normalizes a thinking part to a canonical form.
|
||||
// Strips cache_control and other non-essential fields.
|
||||
// Returns the sanitized part as JSON string.
|
||||
func SanitizeThinkingPart(part gjson.Result) string {
|
||||
// Gemini-style: { thought: true, text, thoughtSignature }
|
||||
if part.Get("thought").Bool() {
|
||||
result := `{"thought":true}`
|
||||
if text := GetThinkingText(part); text != "" {
|
||||
result, _ = sjson.Set(result, "text", text)
|
||||
}
|
||||
if sig := part.Get("thoughtSignature"); sig.Exists() && sig.Type == gjson.String {
|
||||
result, _ = sjson.Set(result, "thoughtSignature", sig.String())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Anthropic-style: { type: "thinking", thinking, signature }
|
||||
if part.Get("type").String() == "thinking" || part.Get("thinking").Exists() {
|
||||
result := `{"type":"thinking"}`
|
||||
if text := GetThinkingText(part); text != "" {
|
||||
result, _ = sjson.Set(result, "thinking", text)
|
||||
}
|
||||
if sig := part.Get("signature"); sig.Exists() && sig.Type == gjson.String {
|
||||
result, _ = sjson.Set(result, "signature", sig.String())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Not a thinking part, return as-is but strip cache_control
|
||||
return StripCacheControl(part.Raw)
|
||||
}
|
||||
|
||||
// StripCacheControl removes cache_control and providerOptions from a JSON object.
|
||||
func StripCacheControl(jsonStr string) string {
|
||||
result := jsonStr
|
||||
result, _ = sjson.Delete(result, "cache_control")
|
||||
result, _ = sjson.Delete(result, "providerOptions")
|
||||
return result
|
||||
}
|
||||
@@ -127,7 +127,7 @@ func (w *Watcher) reloadConfig() bool {
|
||||
}
|
||||
|
||||
authDirChanged := oldConfig == nil || oldConfig.AuthDir != newConfig.AuthDir
|
||||
forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelMappings, newConfig.OAuthModelMappings))
|
||||
forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias))
|
||||
|
||||
log.Infof("config successfully reloaded, triggering client reload")
|
||||
w.reloadClients(authDirChanged, affectedOAuthProviders, forceAuthRefresh)
|
||||
|
||||
@@ -54,6 +54,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
|
||||
if oldCfg.ForceModelPrefix != newCfg.ForceModelPrefix {
|
||||
changes = append(changes, fmt.Sprintf("force-model-prefix: %t -> %t", oldCfg.ForceModelPrefix, newCfg.ForceModelPrefix))
|
||||
}
|
||||
if oldCfg.NonStreamKeepAliveInterval != newCfg.NonStreamKeepAliveInterval {
|
||||
changes = append(changes, fmt.Sprintf("nonstream-keepalive-interval: %d -> %d", oldCfg.NonStreamKeepAliveInterval, newCfg.NonStreamKeepAliveInterval))
|
||||
}
|
||||
|
||||
// Quota-exceeded behavior
|
||||
if oldCfg.QuotaExceeded.SwitchProject != newCfg.QuotaExceeded.SwitchProject {
|
||||
@@ -209,7 +212,7 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
|
||||
if entries, _ := DiffOAuthExcludedModelChanges(oldCfg.OAuthExcludedModels, newCfg.OAuthExcludedModels); len(entries) > 0 {
|
||||
changes = append(changes, entries...)
|
||||
}
|
||||
if entries, _ := DiffOAuthModelMappingChanges(oldCfg.OAuthModelMappings, newCfg.OAuthModelMappings); len(entries) > 0 {
|
||||
if entries, _ := DiffOAuthModelAliasChanges(oldCfg.OAuthModelAlias, newCfg.OAuthModelAlias); len(entries) > 0 {
|
||||
changes = append(changes, entries...)
|
||||
}
|
||||
|
||||
|
||||
@@ -231,10 +231,11 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
|
||||
AmpCode: config.AmpCode{UpstreamAPIKey: "keep", RestrictManagementToLocalhost: false},
|
||||
RemoteManagement: config.RemoteManagement{DisableControlPanel: false, PanelGitHubRepository: "old/repo", SecretKey: "keep"},
|
||||
SDKConfig: sdkconfig.SDKConfig{
|
||||
RequestLog: false,
|
||||
ProxyURL: "http://old-proxy",
|
||||
APIKeys: []string{"key-1"},
|
||||
ForceModelPrefix: false,
|
||||
RequestLog: false,
|
||||
ProxyURL: "http://old-proxy",
|
||||
APIKeys: []string{"key-1"},
|
||||
ForceModelPrefix: false,
|
||||
NonStreamKeepAliveInterval: 0,
|
||||
},
|
||||
}
|
||||
newCfg := &config.Config{
|
||||
@@ -267,10 +268,11 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
|
||||
SecretKey: "",
|
||||
},
|
||||
SDKConfig: sdkconfig.SDKConfig{
|
||||
RequestLog: true,
|
||||
ProxyURL: "http://new-proxy",
|
||||
APIKeys: []string{" key-1 ", "key-2"},
|
||||
ForceModelPrefix: true,
|
||||
RequestLog: true,
|
||||
ProxyURL: "http://new-proxy",
|
||||
APIKeys: []string{" key-1 ", "key-2"},
|
||||
ForceModelPrefix: true,
|
||||
NonStreamKeepAliveInterval: 5,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -285,6 +287,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
|
||||
expectContains(t, details, "proxy-url: http://old-proxy -> http://new-proxy")
|
||||
expectContains(t, details, "ws-auth: false -> true")
|
||||
expectContains(t, details, "force-model-prefix: false -> true")
|
||||
expectContains(t, details, "nonstream-keepalive-interval: 0 -> 5")
|
||||
expectContains(t, details, "quota-exceeded.switch-project: false -> true")
|
||||
expectContains(t, details, "quota-exceeded.switch-preview-model: false -> true")
|
||||
expectContains(t, details, "api-keys count: 1 -> 2")
|
||||
|
||||
@@ -10,23 +10,23 @@ import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
)
|
||||
|
||||
type OAuthModelMappingsSummary struct {
|
||||
type OAuthModelAliasSummary struct {
|
||||
hash string
|
||||
count int
|
||||
}
|
||||
|
||||
// SummarizeOAuthModelMappings summarizes OAuth model mappings per channel.
|
||||
func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string]OAuthModelMappingsSummary {
|
||||
// SummarizeOAuthModelAlias summarizes OAuth model alias per channel.
|
||||
func SummarizeOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string]OAuthModelAliasSummary {
|
||||
if len(entries) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make(map[string]OAuthModelMappingsSummary, len(entries))
|
||||
out := make(map[string]OAuthModelAliasSummary, len(entries))
|
||||
for k, v := range entries {
|
||||
key := strings.ToLower(strings.TrimSpace(k))
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
out[key] = summarizeOAuthModelMappingList(v)
|
||||
out[key] = summarizeOAuthModelAliasList(v)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
@@ -34,10 +34,10 @@ func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) m
|
||||
return out
|
||||
}
|
||||
|
||||
// DiffOAuthModelMappingChanges compares OAuth model mappings maps.
|
||||
func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMapping) ([]string, []string) {
|
||||
oldSummary := SummarizeOAuthModelMappings(oldMap)
|
||||
newSummary := SummarizeOAuthModelMappings(newMap)
|
||||
// DiffOAuthModelAliasChanges compares OAuth model alias maps.
|
||||
func DiffOAuthModelAliasChanges(oldMap, newMap map[string][]config.OAuthModelAlias) ([]string, []string) {
|
||||
oldSummary := SummarizeOAuthModelAlias(oldMap)
|
||||
newSummary := SummarizeOAuthModelAlias(newMap)
|
||||
keys := make(map[string]struct{}, len(oldSummary)+len(newSummary))
|
||||
for k := range oldSummary {
|
||||
keys[k] = struct{}{}
|
||||
@@ -52,13 +52,13 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa
|
||||
newInfo, okNew := newSummary[key]
|
||||
switch {
|
||||
case okOld && !okNew:
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: removed", key))
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: removed", key))
|
||||
affected = append(affected, key)
|
||||
case !okOld && okNew:
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: added (%d entries)", key, newInfo.count))
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: added (%d entries)", key, newInfo.count))
|
||||
affected = append(affected, key)
|
||||
case okOld && okNew && oldInfo.hash != newInfo.hash:
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
|
||||
affected = append(affected, key)
|
||||
}
|
||||
}
|
||||
@@ -67,20 +67,20 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa
|
||||
return changes, affected
|
||||
}
|
||||
|
||||
func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMappingsSummary {
|
||||
func summarizeOAuthModelAliasList(list []config.OAuthModelAlias) OAuthModelAliasSummary {
|
||||
if len(list) == 0 {
|
||||
return OAuthModelMappingsSummary{}
|
||||
return OAuthModelAliasSummary{}
|
||||
}
|
||||
seen := make(map[string]struct{}, len(list))
|
||||
normalized := make([]string, 0, len(list))
|
||||
for _, mapping := range list {
|
||||
name := strings.ToLower(strings.TrimSpace(mapping.Name))
|
||||
alias := strings.ToLower(strings.TrimSpace(mapping.Alias))
|
||||
if name == "" || alias == "" {
|
||||
for _, alias := range list {
|
||||
name := strings.ToLower(strings.TrimSpace(alias.Name))
|
||||
aliasVal := strings.ToLower(strings.TrimSpace(alias.Alias))
|
||||
if name == "" || aliasVal == "" {
|
||||
continue
|
||||
}
|
||||
key := name + "->" + alias
|
||||
if mapping.Fork {
|
||||
key := name + "->" + aliasVal
|
||||
if alias.Fork {
|
||||
key += "|fork"
|
||||
}
|
||||
if _, exists := seen[key]; exists {
|
||||
@@ -90,11 +90,11 @@ func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMa
|
||||
normalized = append(normalized, key)
|
||||
}
|
||||
if len(normalized) == 0 {
|
||||
return OAuthModelMappingsSummary{}
|
||||
return OAuthModelAliasSummary{}
|
||||
}
|
||||
sort.Strings(normalized)
|
||||
sum := sha256.Sum256([]byte(strings.Join(normalized, "|")))
|
||||
return OAuthModelMappingsSummary{
|
||||
return OAuthModelAliasSummary{
|
||||
hash: hex.EncodeToString(sum[:]),
|
||||
count: len(normalized),
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package synthesizer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff"
|
||||
@@ -59,6 +60,9 @@ func (s *ConfigSynthesizer) synthesizeGeminiKeys(ctx *SynthesisContext) []*corea
|
||||
"source": fmt.Sprintf("config:gemini[%s]", token),
|
||||
"api_key": key,
|
||||
}
|
||||
if entry.Priority != 0 {
|
||||
attrs["priority"] = strconv.Itoa(entry.Priority)
|
||||
}
|
||||
if base != "" {
|
||||
attrs["base_url"] = base
|
||||
}
|
||||
@@ -103,6 +107,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea
|
||||
"source": fmt.Sprintf("config:claude[%s]", token),
|
||||
"api_key": key,
|
||||
}
|
||||
if ck.Priority != 0 {
|
||||
attrs["priority"] = strconv.Itoa(ck.Priority)
|
||||
}
|
||||
if base != "" {
|
||||
attrs["base_url"] = base
|
||||
}
|
||||
@@ -147,6 +154,9 @@ func (s *ConfigSynthesizer) synthesizeCodexKeys(ctx *SynthesisContext) []*coreau
|
||||
"source": fmt.Sprintf("config:codex[%s]", token),
|
||||
"api_key": key,
|
||||
}
|
||||
if ck.Priority != 0 {
|
||||
attrs["priority"] = strconv.Itoa(ck.Priority)
|
||||
}
|
||||
if ck.BaseURL != "" {
|
||||
attrs["base_url"] = ck.BaseURL
|
||||
}
|
||||
@@ -202,6 +212,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor
|
||||
"compat_name": compat.Name,
|
||||
"provider_key": providerName,
|
||||
}
|
||||
if compat.Priority != 0 {
|
||||
attrs["priority"] = strconv.Itoa(compat.Priority)
|
||||
}
|
||||
if key != "" {
|
||||
attrs["api_key"] = key
|
||||
}
|
||||
@@ -233,6 +246,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor
|
||||
"compat_name": compat.Name,
|
||||
"provider_key": providerName,
|
||||
}
|
||||
if compat.Priority != 0 {
|
||||
attrs["priority"] = strconv.Itoa(compat.Priority)
|
||||
}
|
||||
if hash := diff.ComputeOpenAICompatModelsHash(compat.Models); hash != "" {
|
||||
attrs["models_hash"] = hash
|
||||
}
|
||||
@@ -275,6 +291,9 @@ func (s *ConfigSynthesizer) synthesizeVertexCompat(ctx *SynthesisContext) []*cor
|
||||
"base_url": base,
|
||||
"provider_key": providerName,
|
||||
}
|
||||
if compat.Priority != 0 {
|
||||
attrs["priority"] = strconv.Itoa(compat.Priority)
|
||||
}
|
||||
if key != "" {
|
||||
attrs["api_key"] = key
|
||||
}
|
||||
|
||||
@@ -146,10 +146,12 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
|
||||
c.Header("Content-Type", "application/json")
|
||||
alt := h.GetAlt(c)
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
|
||||
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
|
||||
|
||||
modelName := gjson.GetBytes(rawJSON, "model").String()
|
||||
|
||||
resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
|
||||
stopKeepAlive()
|
||||
if errMsg != nil {
|
||||
h.WriteErrorResponse(c, errMsg)
|
||||
cliCancel(errMsg.Error)
|
||||
@@ -159,13 +161,18 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
|
||||
// Decompress gzipped responses - Claude API sometimes returns gzip without Content-Encoding header
|
||||
// This fixes title generation and other non-streaming responses that arrive compressed
|
||||
if len(resp) >= 2 && resp[0] == 0x1f && resp[1] == 0x8b {
|
||||
gzReader, err := gzip.NewReader(bytes.NewReader(resp))
|
||||
if err != nil {
|
||||
log.Warnf("failed to decompress gzipped Claude response: %v", err)
|
||||
gzReader, errGzip := gzip.NewReader(bytes.NewReader(resp))
|
||||
if errGzip != nil {
|
||||
log.Warnf("failed to decompress gzipped Claude response: %v", errGzip)
|
||||
} else {
|
||||
defer gzReader.Close()
|
||||
if decompressed, err := io.ReadAll(gzReader); err != nil {
|
||||
log.Warnf("failed to read decompressed Claude response: %v", err)
|
||||
defer func() {
|
||||
if errClose := gzReader.Close(); errClose != nil {
|
||||
log.Warnf("failed to close Claude gzip reader: %v", errClose)
|
||||
}
|
||||
}()
|
||||
decompressed, errRead := io.ReadAll(gzReader)
|
||||
if errRead != nil {
|
||||
log.Warnf("failed to read decompressed Claude response: %v", errRead)
|
||||
} else {
|
||||
resp = decompressed
|
||||
}
|
||||
|
||||
@@ -56,8 +56,12 @@ func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
|
||||
for k, v := range model {
|
||||
normalizedModel[k] = v
|
||||
}
|
||||
if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
|
||||
normalizedModel["name"] = "models/" + name
|
||||
if name, ok := normalizedModel["name"].(string); ok && name != "" {
|
||||
if !strings.HasPrefix(name, "models/") {
|
||||
normalizedModel["name"] = "models/" + name
|
||||
}
|
||||
normalizedModel["displayName"] = name
|
||||
normalizedModel["description"] = name
|
||||
}
|
||||
if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
|
||||
normalizedModel["supportedGenerationMethods"] = defaultMethods
|
||||
@@ -85,94 +89,35 @@ func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
action := strings.TrimPrefix(request.Action, "/")
|
||||
switch action {
|
||||
case "gemini-3-pro-preview":
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"name": "models/gemini-3-pro-preview",
|
||||
"version": "3",
|
||||
"displayName": "Gemini 3 Pro Preview",
|
||||
"description": "Gemini 3 Pro Preview",
|
||||
"inputTokenLimit": 1048576,
|
||||
"outputTokenLimit": 65536,
|
||||
"supportedGenerationMethods": []string{
|
||||
"generateContent",
|
||||
"countTokens",
|
||||
"createCachedContent",
|
||||
"batchGenerateContent",
|
||||
},
|
||||
"temperature": 1,
|
||||
"topP": 0.95,
|
||||
"topK": 64,
|
||||
"maxTemperature": 2,
|
||||
"thinking": true,
|
||||
},
|
||||
)
|
||||
case "gemini-2.5-pro":
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"name": "models/gemini-2.5-pro",
|
||||
"version": "2.5",
|
||||
"displayName": "Gemini 2.5 Pro",
|
||||
"description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
"inputTokenLimit": 1048576,
|
||||
"outputTokenLimit": 65536,
|
||||
"supportedGenerationMethods": []string{
|
||||
"generateContent",
|
||||
"countTokens",
|
||||
"createCachedContent",
|
||||
"batchGenerateContent",
|
||||
},
|
||||
"temperature": 1,
|
||||
"topP": 0.95,
|
||||
"topK": 64,
|
||||
"maxTemperature": 2,
|
||||
"thinking": true,
|
||||
},
|
||||
)
|
||||
case "gemini-2.5-flash":
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"name": "models/gemini-2.5-flash",
|
||||
"version": "001",
|
||||
"displayName": "Gemini 2.5 Flash",
|
||||
"description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
"inputTokenLimit": 1048576,
|
||||
"outputTokenLimit": 65536,
|
||||
"supportedGenerationMethods": []string{
|
||||
"generateContent",
|
||||
"countTokens",
|
||||
"createCachedContent",
|
||||
"batchGenerateContent",
|
||||
},
|
||||
"temperature": 1,
|
||||
"topP": 0.95,
|
||||
"topK": 64,
|
||||
"maxTemperature": 2,
|
||||
"thinking": true,
|
||||
})
|
||||
case "gpt-5":
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"name": "gpt-5",
|
||||
"version": "001",
|
||||
"displayName": "GPT 5",
|
||||
"description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
"inputTokenLimit": 400000,
|
||||
"outputTokenLimit": 128000,
|
||||
"supportedGenerationMethods": []string{
|
||||
"generateContent",
|
||||
},
|
||||
"temperature": 1,
|
||||
"topP": 0.95,
|
||||
"topK": 64,
|
||||
"maxTemperature": 2,
|
||||
"thinking": true,
|
||||
})
|
||||
default:
|
||||
c.JSON(http.StatusNotFound, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: "Not Found",
|
||||
Type: "not_found",
|
||||
},
|
||||
})
|
||||
|
||||
// Get dynamic models from the global registry and find the matching one
|
||||
availableModels := h.Models()
|
||||
var targetModel map[string]any
|
||||
|
||||
for _, model := range availableModels {
|
||||
name, _ := model["name"].(string)
|
||||
// Match name with or without 'models/' prefix
|
||||
if name == action || name == "models/"+action {
|
||||
targetModel = model
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if targetModel != nil {
|
||||
// Ensure the name has 'models/' prefix in the output if it's a Gemini model
|
||||
if name, ok := targetModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
|
||||
targetModel["name"] = "models/" + name
|
||||
}
|
||||
c.JSON(http.StatusOK, targetModel)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusNotFound, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: "Not Found",
|
||||
Type: "not_found",
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// GeminiHandler handles POST requests for Gemini API operations.
|
||||
@@ -336,7 +281,9 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
|
||||
c.Header("Content-Type", "application/json")
|
||||
alt := h.GetAlt(c)
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
|
||||
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
|
||||
resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
|
||||
stopKeepAlive()
|
||||
if errMsg != nil {
|
||||
h.WriteErrorResponse(c, errMsg)
|
||||
cliCancel(errMsg.Error)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user