Compare commits

...

24 Commits

Author SHA1 Message Date
Luis Pater
3e7b645346 Merge pull request #186 from router-for-me/doc
docs: add AI Studio setup
2025-10-29 21:53:49 +08:00
hkfires
24446a4dc4 feat(cliproxy): skip persisting runtime-only websocket auths 2025-10-29 21:49:35 +08:00
hkfires
475f473dab docs: add AI Studio setup 2025-10-29 21:10:14 +08:00
Luis Pater
8dba32a077 Merge pull request #185 from router-for-me/thinking
Feat: Add reasoning effort support for Gemini models
2025-10-29 20:27:07 +08:00
hkfires
1bbbd16df6 chore(logging): clarify 429 rate-limit retries in Gemini executor 2025-10-29 19:19:18 +08:00
hkfires
5cb378256b feat(gemini-translators): set include_thoughts when mapping thinking 2025-10-29 19:19:18 +08:00
hkfires
3ac5f05e8c feat(gemini): prefer official reasoning fields, add extra_body(cherry studio) fallback 2025-10-29 19:19:18 +08:00
hkfires
58d30369b4 fix(gemini-cli): correctly strip/normalize thinking config by model 2025-10-29 19:19:18 +08:00
hkfires
7dd93a4a25 fix(executor): only apply thinking config to supported models 2025-10-29 19:19:17 +08:00
hkfires
2a3ee8d0e3 fix(translators): normalize thinking budgets 2025-10-29 19:19:17 +08:00
hkfires
41577bce07 feat(claude): map Anthropic 'thinking' to Gemini thinkingBudget 2025-10-29 19:19:17 +08:00
hkfires
3d7aca22c0 feat(registry): add thinking budget support; populate Gemini models 2025-10-29 19:19:17 +08:00
hkfires
680b3f5010 fix(translator): avoid default thinkingConfig in Gemini requests 2025-10-29 19:19:17 +08:00
Luis Pater
9d42e4b239 feat(runtime): add User-Agent headers to codex and claude executors
- Standardized User-Agent strings for Codex and Claude executors to improve request tracing and compatibility.
- Updated header insertion logic in both executors for consistency.
2025-10-29 12:57:37 +08:00
Luis Pater
97af785aad docs(readme): add CLIProxyAPI Linux installer instructions
- Updated `README.md` and `README_CN.md` with steps to install via the Linux installer.
- Acknowledged [brokechubb](https://github.com/brokechubb) for building the installer.
2025-10-28 23:17:08 +08:00
Luis Pater
0defb68c6c fix(translator): improve error handling for function parameters schema transformation
- Added fallback to set default `parametersJsonSchema` when `parameters` key is absent.
- Enhanced logging to capture detailed errors during schema transformation.
- Refined tool declaration appending logic for robustness.
2025-10-28 22:57:26 +08:00
Luis Pater
d6272d3300 Merge pull request #177 from router-for-me/aistudio
feat(registry): unify Gemini models and add AI Studio set
2025-10-28 21:57:18 +08:00
hkfires
c99d0dfb33 fix(aistudio): remove no-op executor unregister on WS disconnect 2025-10-28 19:51:05 +08:00
hkfires
663b9b35ab fix(executor): pass authID to relay instead of provider 2025-10-28 19:28:26 +08:00
hkfires
5dced4c0a6 feat(registry): unify Gemini models and add AI Studio set 2025-10-28 19:00:25 +08:00
Luis Pater
5891785125 docs(readme): clarify model definition and add usage example for undefined models
- Updated `README.md` and `README_CN.md` to include additional instructions on requesting undefined models using the `openrouter://` format.
- Added example for `moonshotai/kimi-k2:free` usage.
2025-10-28 09:09:19 +08:00
Luis Pater
ac3d47e8c0 Merge pull request #173 from tobwen/feature/dynamic-model-routing
Add support for dynamic model providers
2025-10-28 08:55:08 +08:00
tobwen
e5ed2cba4a Add support for dynamic model providers
Implements functionality to parse model names with provider information in the format "provider://model" This allows dynamic provider selection rather than relying only on predefined mappings.

The change affects all execution methods to properly handle these dynamic model specifications while maintaining compatibility with the existing approach for standard model names.
2025-10-28 01:41:54 +01:00
Luis Pater
847c2502a5 Fixed: #172
feat(runtime): add Brotli and Zstd compression support, improve response handling

- Implemented Brotli and Zstd decompression handling in `FileRequestLogger` and executor logic for enhanced compatibility.
- Added `decodeResponseBody` utility for streamlined multi-encoding support (Gzip, Deflate, Brotli, Zstd).
- Improved resource cleanup with composite readers for proper closure under all conditions.
- Updated dependencies in `go.mod` and `go.sum` to include Brotli and Zstd libraries.
2025-10-28 08:39:03 +08:00
26 changed files with 796 additions and 360 deletions

View File

@@ -23,6 +23,7 @@ Chinese providers have now been added: [Qwen Code](https://github.com/QwenLM/qwe
- Multiple accounts with round-robin load balancing (Gemini, OpenAI, Claude, Qwen and iFlow)
- Simple CLI authentication flows (Gemini, OpenAI, Claude, Qwen and iFlow)
- Generative Language API Key support
- AI Studio Build multi-account load balancing
- Gemini CLI multi-account load balancing
- Claude Code multi-account load balancing
- Qwen Code multi-account load balancing
@@ -68,6 +69,14 @@ brew install cliproxyapi
brew services start cliproxyapi
```
### Installation via CLIProxyAPI Linux Installer
```bash
curl -fsSL https://raw.githubusercontent.com/brokechubb/cliproxyapi-installer/refs/heads/master/cliproxyapi-installer | bash
```
Thanks to [brokechubb](https://github.com/brokechubb) for building the Linux installer!
## Usage
### GUI Client & Official WebUI
@@ -260,12 +269,16 @@ console.log(await claudeResponse.json());
- gemini-2.5-flash-lite
- gemini-2.5-flash-image
- gemini-2.5-flash-image-preview
- gemini-pro-latest
- gemini-flash-latest
- gemini-flash-lite-latest
- gpt-5
- gpt-5-codex
- claude-opus-4-1-20250805
- claude-opus-4-20250514
- claude-sonnet-4-20250514
- claude-sonnet-4-5-20250929
- claude-haiku-4-5-20251001
- claude-3-7-sonnet-20250219
- claude-3-5-haiku-20241022
- qwen3-coder-plus
@@ -277,7 +290,6 @@ console.log(await claudeResponse.json());
- deepseek-r1
- deepseek-v3
- kimi-k2
- glm-4.5
- glm-4.6
- tstars2.0
- And other iFlow-supported models
@@ -415,7 +427,7 @@ openai-compatibility:
# api-keys:
# - "sk-or-v1-...b780"
# - "sk-or-v1-...b781"
models: # The models supported by the provider.
models: # The models supported by the provider. Or you can use a format such as openrouter://moonshotai/kimi-k2:free to request undefined models
- name: "moonshotai/kimi-k2:free" # The actual model name.
alias: "kimi-k2" # The alias used in the API.
```
@@ -510,28 +522,37 @@ openai-compatibility:
alias: "kimi-k2"
```
Legacy format (still supported):
```yaml
openai-compatibility:
- name: "openrouter"
base-url: "https://openrouter.ai/api/v1"
api-keys:
- "sk-or-v1-...b780"
- "sk-or-v1-...b781"
models:
- name: "moonshotai/kimi-k2:free"
alias: "kimi-k2"
```
Usage:
Call OpenAI's endpoint `/v1/chat/completions` with `model` set to the alias (e.g., `kimi-k2`). The proxy routes to the configured provider/model automatically.
Also, you may call Claude's endpoint `/v1/messages`, Gemini's `/v1beta/models/model-name:streamGenerateContent` or `/v1beta/models/model-name:generateContent`.
And you can always use Gemini CLI with `CODE_ASSIST_ENDPOINT` set to `http://127.0.0.1:8317` for these OpenAI-compatible provider's models.
### AI Studio Instructions
You can use this service (CLIProxyAPI) as a backend for [this AI Studio App](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL). Follow the steps below to configure it:
1. **Start the CLIProxyAPI Service**: Ensure your CLIProxyAPI instance is running, either locally or remotely.
2. **Access the AI Studio App**: Log in to your Google account in your browser, then open the following link:
- [https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL)
#### Connection Configuration
By default, the AI Studio App attempts to connect to a local CLIProxyAPI instance at `ws://127.0.0.1:8317`.
- **Connecting to a Remote Service**:
If you need to connect to a remotely deployed CLIProxyAPI, modify the `config.ts` file in the AI Studio App to update the `WEBSOCKET_PROXY_URL` value.
- Use the `wss://` protocol if your remote service has SSL enabled.
- Use the `ws://` protocol if SSL is not enabled.
#### Authentication Configuration
By default, WebSocket connections to CLIProxyAPI do not require authentication.
- **Enable Authentication on the CLIProxyAPI Server**:
In your `config.yaml` file, set `ws_auth` to `true`.
- **Configure Authentication on the AI Studio Client**:
In the `config.ts` file of the AI Studio App, set the `JWT_TOKEN` value to your authentication token.
### Authentication Directory

View File

@@ -43,6 +43,7 @@
- 多账户支持与轮询负载均衡Gemini、OpenAI、Claude、Qwen 与 iFlow
- 简单的 CLI 身份验证流程Gemini、OpenAI、Claude、Qwen 与 iFlow
- 支持 Gemini AIStudio API 密钥
- 支持 AI Studio Build 多账户轮询
- 支持 Gemini CLI 多账户轮询
- 支持 Claude Code 多账户轮询
- 支持 Qwen Code 多账户轮询
@@ -82,6 +83,14 @@ brew install cliproxyapi
brew services start cliproxyapi
```
### 通过 CLIProxyAPI Linux Installer 安装
```bash
curl -fsSL https://raw.githubusercontent.com/brokechubb/cliproxyapi-installer/refs/heads/master/cliproxyapi-installer | bash
```
感谢 [brokechubb](https://github.com/brokechubb) 构建了 Linux installer
## 使用方法
### 图形客户端与官方 WebUI
@@ -273,12 +282,16 @@ console.log(await claudeResponse.json());
- gemini-2.5-flash-lite
- gemini-2.5-flash-image
- gemini-2.5-flash-image-preview
- gemini-pro-latest
- gemini-flash-latest
- gemini-flash-lite-latest
- gpt-5
- gpt-5-codex
- claude-opus-4-1-20250805
- claude-opus-4-20250514
- claude-sonnet-4-20250514
- claude-sonnet-4-5-20250929
- claude-haiku-4-5-20251001
- claude-3-7-sonnet-20250219
- claude-3-5-haiku-20241022
- qwen3-coder-plus
@@ -290,7 +303,6 @@ console.log(await claudeResponse.json());
- deepseek-r1
- deepseek-v3
- kimi-k2
- glm-4.5
- glm-4.6
- tstars2.0
- 以及其他 iFlow 支持的模型
@@ -428,7 +440,7 @@ openai-compatibility:
# api-keys:
# - "sk-or-v1-...b780"
# - "sk-or-v1-...b781"
models: # 提供商支持的模型。
models: # 提供商支持的模型。或者你可以使用类似 openrouter://moonshotai/kimi-k2:free 这样的格式来请求未在这里定义的模型
- name: "moonshotai/kimi-k2:free" # 实际的模型名称。
alias: "kimi-k2" # 在API中使用的别名。
```
@@ -523,24 +535,36 @@ openai-compatibility:
alias: "kimi-k2"
```
旧格式(仍支持):
```yaml
openai-compatibility:
- name: "openrouter"
base-url: "https://openrouter.ai/api/v1"
api-keys:
- "sk-or-v1-...b780"
- "sk-or-v1-...b781"
models:
- name: "moonshotai/kimi-k2:free"
alias: "kimi-k2"
```
使用方式:在 `/v1/chat/completions` 中将 `model` 设为别名(如 `kimi-k2`),代理将自动路由到对应提供商与模型。
并且对于这些与OpenAI兼容的提供商模型您始终可以通过将CODE_ASSIST_ENDPOINT设置为 http://127.0.0.1:8317 来使用Gemini CLI。
### AI Studio 使用说明
您可以将本服务 (CLIProxyAPI) 作为后端,配合 [这个 AI Studio 应用](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL) 使用。请遵循以下步骤进行配置:
1. **启动 CLIProxyAPI 服务**:确保您的 CLIProxyAPI 实例正在本地或远程运行。
2. **访问 AI Studio 应用**:在浏览器中登录您的 Google 账户,然后打开以下链接:
- [https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL)
#### 连接配置
默认情况下AI Studio 应用会尝试连接到本地的 CLIProxyAPI (`ws://127.0.0.1:8317`)。
- **连接到远程服务**
如果您需要连接到远程部署的 CLIProxyAPI请修改 AI Studio 应用中的 `config.ts` 文件,更新 `WEBSOCKET_PROXY_URL` 的值。
- 如果您的远程服务启用了 SSL请使用 `wss://` 协议。
- 如果未启用 SSL请使用 `ws://` 协议。
#### 认证配置
默认情况下CLIProxyAPI 的 WebSocket 连接不要求认证。
- **在 CLIProxyAPI 服务端启用认证**
在您的 `config.yaml` 文件中,将 `ws_auth` 设置为 `true`。
- **在 AI Studio 客户端配置认证**
在 AI Studio 应用的 `config.ts` 文件中,设置 `JWT_TOKEN` 的值为您的认证令牌。
### 身份验证目录
`auth-dir` 参数指定身份验证令牌的存储位置。当您运行登录命令时,应用程序将在此目录中创建包含 Google 账户身份验证令牌的 JSON 文件。多个账户可用于轮询。

View File

@@ -146,6 +146,10 @@ func (MyExecutor) ExecuteStream(ctx context.Context, a *coreauth.Auth, req clipe
return ch, nil
}
func (MyExecutor) CountTokens(ctx context.Context, a *coreauth.Auth, req clipexec.Request, opts clipexec.Options) (clipexec.Response, error) {
return clipexec.Response{}, errors.New("not implemented")
}
func (MyExecutor) Refresh(ctx context.Context, a *coreauth.Auth) (*coreauth.Auth, error) {
return a, nil
}

1
go.mod
View File

@@ -28,6 +28,7 @@ require (
cloud.google.com/go/compute/metadata v0.3.0 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/ProtonMail/go-crypto v1.3.0 // indirect
github.com/andybalholm/brotli v1.0.6 // indirect
github.com/bytedance/sonic v1.11.6 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cloudflare/circl v1.6.1 // indirect

2
go.sum
View File

@@ -4,6 +4,8 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw=
github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE=
github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI=
github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=

View File

@@ -225,9 +225,13 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
envManagementSecret := envAdminPasswordSet && envAdminPassword != ""
// Create server instance
providerNames := make([]string, 0, len(cfg.OpenAICompatibility))
for _, p := range cfg.OpenAICompatibility {
providerNames = append(providerNames, p.Name)
}
s := &Server{
engine: engine,
handlers: handlers.NewBaseAPIHandlers(&cfg.SDKConfig, authManager),
handlers: handlers.NewBaseAPIHandlers(&cfg.SDKConfig, authManager, providerNames),
cfg: cfg,
accessManager: accessManager,
requestLogger: requestLogger,
@@ -823,6 +827,13 @@ func (s *Server) UpdateClients(cfg *config.Config) {
managementasset.SetCurrentConfig(cfg)
// Save YAML snapshot for next comparison
s.oldConfigYaml, _ = yaml.Marshal(cfg)
providerNames := make([]string, 0, len(cfg.OpenAICompatibility))
for _, p := range cfg.OpenAICompatibility {
providerNames = append(providerNames, p.Name)
}
s.handlers.OpenAICompatProviders = providerNames
s.handlers.UpdateClients(&cfg.SDKConfig)
if !cfg.RemoteManagement.DisableControlPanel {
@@ -903,5 +914,3 @@ func AuthMiddleware(manager *sdkaccess.Manager) gin.HandlerFunc {
}
}
}
// legacy clientsToSlice removed; handlers no longer consume legacy client slices

View File

@@ -15,6 +15,10 @@ import (
"strings"
"time"
"github.com/andybalholm/brotli"
"github.com/klauspost/compress/zstd"
log "github.com/sirupsen/logrus"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
)
@@ -411,6 +415,10 @@ func (l *FileRequestLogger) decompressResponse(responseHeaders map[string][]stri
return l.decompressGzip(response)
case "deflate":
return l.decompressDeflate(response)
case "br":
return l.decompressBrotli(response)
case "zstd":
return l.decompressZstd(response)
default:
// No compression or unsupported compression
return response, nil
@@ -431,7 +439,9 @@ func (l *FileRequestLogger) decompressGzip(data []byte) ([]byte, error) {
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
}
defer func() {
_ = reader.Close()
if errClose := reader.Close(); errClose != nil {
log.WithError(errClose).Warn("failed to close gzip reader in request logger")
}
}()
decompressed, err := io.ReadAll(reader)
@@ -453,7 +463,9 @@ func (l *FileRequestLogger) decompressGzip(data []byte) ([]byte, error) {
func (l *FileRequestLogger) decompressDeflate(data []byte) ([]byte, error) {
reader := flate.NewReader(bytes.NewReader(data))
defer func() {
_ = reader.Close()
if errClose := reader.Close(); errClose != nil {
log.WithError(errClose).Warn("failed to close deflate reader in request logger")
}
}()
decompressed, err := io.ReadAll(reader)
@@ -464,6 +476,48 @@ func (l *FileRequestLogger) decompressDeflate(data []byte) ([]byte, error) {
return decompressed, nil
}
// decompressBrotli decompresses brotli-encoded data.
//
// Parameters:
// - data: The brotli-encoded data to decompress
//
// Returns:
// - []byte: The decompressed data
// - error: An error if decompression fails, nil otherwise
func (l *FileRequestLogger) decompressBrotli(data []byte) ([]byte, error) {
reader := brotli.NewReader(bytes.NewReader(data))
decompressed, err := io.ReadAll(reader)
if err != nil {
return nil, fmt.Errorf("failed to decompress brotli data: %w", err)
}
return decompressed, nil
}
// decompressZstd decompresses zstd-encoded data.
//
// Parameters:
// - data: The zstd-encoded data to decompress
//
// Returns:
// - []byte: The decompressed data
// - error: An error if decompression fails, nil otherwise
func (l *FileRequestLogger) decompressZstd(data []byte) ([]byte, error) {
decoder, err := zstd.NewReader(bytes.NewReader(data))
if err != nil {
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
}
defer decoder.Close()
decompressed, err := io.ReadAll(decoder)
if err != nil {
return nil, fmt.Errorf("failed to decompress zstd data: %w", err)
}
return decompressed, nil
}
// formatRequestInfo creates the request information section of the log.
//
// Parameters:

View File

@@ -68,84 +68,8 @@ func GetClaudeModels() []*ModelInfo {
}
}
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-flash",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
{
ID: "gemini-2.5-pro",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Flash Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
{
ID: "gemini-2.5-flash-image-preview",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image-preview",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image Preview",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
}
}
// GetGeminiCLIModels returns the standard Gemini model definitions
func GetGeminiCLIModels() []*ModelInfo {
// GeminiModels returns the shared base Gemini model set used by multiple providers.
func GeminiModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-flash",
@@ -160,6 +84,7 @@ func GetGeminiCLIModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-pro",
@@ -174,6 +99,7 @@ func GetGeminiCLIModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
@@ -188,6 +114,7 @@ func GetGeminiCLIModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-image-preview",
@@ -202,6 +129,7 @@ func GetGeminiCLIModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
{
ID: "gemini-2.5-flash-image",
@@ -216,10 +144,69 @@ func GetGeminiCLIModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}
}
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo { return GeminiModels() }
// GetGeminiCLIModels returns the standard Gemini model definitions
func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() }
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo {
base := GeminiModels()
return append(base,
&ModelInfo{
ID: "gemini-pro-latest",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-pro-latest",
Version: "2.5",
DisplayName: "Gemini Pro Latest",
Description: "Latest release of Gemini Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
&ModelInfo{
ID: "gemini-flash-latest",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-latest",
Version: "2.5",
DisplayName: "Gemini Flash Latest",
Description: "Latest release of Gemini Flash",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
&ModelInfo{
ID: "gemini-flash-lite-latest",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-lite-latest",
Version: "2.5",
DisplayName: "Gemini Flash-Lite Latest",
Description: "Latest release of Gemini Flash-Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
)
}
// GetOpenAIModels returns the standard OpenAI model definitions
func GetOpenAIModels() []*ModelInfo {
return []*ModelInfo{
@@ -417,7 +404,6 @@ func GetIFlowModels() []*ModelInfo {
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language"},
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build"},
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905"},
{ID: "glm-4.5", DisplayName: "GLM-4.5", Description: "Zhipu GLM 4.5 general model"},
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model"},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model"},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental"},

View File

@@ -45,6 +45,23 @@ type ModelInfo struct {
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
// SupportedParameters lists supported parameters
SupportedParameters []string `json:"supported_parameters,omitempty"`
// Thinking holds provider-specific reasoning/thinking budget capabilities.
// This is optional and currently used for Gemini thinking budget normalization.
Thinking *ThinkingSupport `json:"thinking,omitempty"`
}
// ThinkingSupport describes a model family's supported internal reasoning budget range.
// Values are interpreted in provider-native token units.
type ThinkingSupport struct {
// Min is the minimum allowed thinking budget (inclusive).
Min int `json:"min,omitempty"`
// Max is the maximum allowed thinking budget (inclusive).
Max int `json:"max,omitempty"`
// ZeroAllowed indicates whether 0 is a valid value (to disable thinking).
ZeroAllowed bool `json:"zero_allowed,omitempty"`
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
}
// ModelRegistration tracks a model's availability
@@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
return result
}
// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
// Returns nil if the model is unknown to the registry.
func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
r.mutex.RLock()
defer r.mutex.RUnlock()
if reg, ok := r.models[modelID]; ok && reg != nil {
return reg.Info
}
return nil
}
// convertModelToMap converts ModelInfo to the appropriate format for different handler types
func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any {
if model == nil {

View File

@@ -19,27 +19,27 @@ import (
"github.com/tidwall/sjson"
)
// AistudioExecutor routes AI Studio requests through a websocket-backed transport.
type AistudioExecutor struct {
// AIStudioExecutor routes AI Studio requests through a websocket-backed transport.
type AIStudioExecutor struct {
provider string
relay *wsrelay.Manager
cfg *config.Config
}
// NewAistudioExecutor constructs a websocket executor for the provider name.
func NewAistudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AistudioExecutor {
return &AistudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
// NewAIStudioExecutor constructs a websocket executor for the provider name.
func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AIStudioExecutor {
return &AIStudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
}
// Identifier returns the provider key served by this executor.
func (e *AistudioExecutor) Identifier() string { return e.provider }
// Identifier returns the logical provider key for routing.
func (e *AIStudioExecutor) Identifier() string { return "aistudio" }
// PrepareRequest is a no-op because websocket transport already injects headers.
func (e *AistudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
return nil
}
func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
@@ -66,14 +66,14 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
Method: http.MethodPost,
Headers: wsReq.Headers.Clone(),
Body: bytes.Clone(body.payload),
Provider: e.provider,
Provider: e.Identifier(),
AuthID: authID,
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
wsResp, err := e.relay.NonStream(ctx, e.provider, wsReq)
wsResp, err := e.relay.NonStream(ctx, authID, wsReq)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
return resp, err
@@ -92,7 +92,7 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
return resp, nil
}
func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
@@ -118,13 +118,13 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
Method: http.MethodPost,
Headers: wsReq.Headers.Clone(),
Body: bytes.Clone(body.payload),
Provider: e.provider,
Provider: e.Identifier(),
AuthID: authID,
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
wsStream, err := e.relay.Stream(ctx, e.provider, wsReq)
wsStream, err := e.relay.Stream(ctx, authID, wsReq)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
return nil, err
@@ -151,7 +151,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
case wsrelay.MessageTypeStreamChunk:
if len(event.Payload) > 0 {
appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
filtered := filterAistudioUsageMetadata(event.Payload)
filtered := filterAIStudioUsageMetadata(event.Payload)
if detail, ok := parseGeminiStreamUsage(filtered); ok {
reporter.publish(ctx, detail)
}
@@ -188,7 +188,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
return stream, nil
}
func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
_, body, err := e.translateRequest(req, opts, false)
if err != nil {
return cliproxyexecutor.Response{}, err
@@ -215,13 +215,13 @@ func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
Method: http.MethodPost,
Headers: wsReq.Headers.Clone(),
Body: bytes.Clone(body.payload),
Provider: e.provider,
Provider: e.Identifier(),
AuthID: authID,
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
resp, err := e.relay.NonStream(ctx, e.provider, wsReq)
resp, err := e.relay.NonStream(ctx, authID, wsReq)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
return cliproxyexecutor.Response{}, err
@@ -241,7 +241,7 @@ func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
}
func (e *AistudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
func (e *AIStudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
_ = ctx
return auth, nil
}
@@ -252,14 +252,18 @@ type translatedPayload struct {
toFormat sdktranslator.Format
}
func (e *AistudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
}
payload = disableGeminiThinkingConfig(payload, req.Model)
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiImageAspectRatio(req.Model, payload)
metadataAction := "generateContent"
if req.Metadata != nil {
@@ -275,7 +279,7 @@ func (e *AistudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
return payload, translatedPayload{payload: payload, action: action, toFormat: to}, nil
}
func (e *AistudioExecutor) buildEndpoint(model, action, alt string) string {
func (e *AIStudioExecutor) buildEndpoint(model, action, alt string) string {
base := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, model, action)
if action == "streamGenerateContent" {
if alt == "" {
@@ -289,9 +293,9 @@ func (e *AistudioExecutor) buildEndpoint(model, action, alt string) string {
return base
}
// filterAistudioUsageMetadata removes usageMetadata from intermediate SSE events so that
// filterAIStudioUsageMetadata removes usageMetadata from intermediate SSE events so that
// only the terminal chunk retains token statistics.
func filterAistudioUsageMetadata(payload []byte) []byte {
func filterAIStudioUsageMetadata(payload []byte) []byte {
if len(payload) == 0 {
return payload
}

View File

@@ -3,6 +3,8 @@ package executor
import (
"bufio"
"bytes"
"compress/flate"
"compress/gzip"
"context"
"fmt"
"io"
@@ -10,6 +12,7 @@ import (
"strings"
"time"
"github.com/andybalholm/brotli"
"github.com/klauspost/compress/zstd"
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -89,31 +92,31 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
recordAPIResponseError(ctx, e.cfg, err)
return resp, err
}
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
}()
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
b, _ := io.ReadAll(httpResp.Body)
appendAPIResponseChunk(ctx, e.cfg, b)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(b))
err = statusErr{code: httpResp.StatusCode, msg: string(b)}
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
return resp, err
}
reader := io.Reader(httpResp.Body)
var decoder *zstd.Decoder
if hasZSTDEcoding(httpResp.Header.Get("Content-Encoding")) {
decoder, err = zstd.NewReader(httpResp.Body)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
return resp, fmt.Errorf("failed to initialize zstd decoder: %w", err)
decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding"))
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
reader = decoder
defer decoder.Close()
return resp, err
}
data, err := io.ReadAll(reader)
defer func() {
if errClose := decodedBody.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
}()
data, err := io.ReadAll(decodedBody)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
return resp, err
@@ -192,19 +195,27 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
err = statusErr{code: httpResp.StatusCode, msg: string(b)}
return nil, err
}
decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding"))
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
return nil, err
}
out := make(chan cliproxyexecutor.StreamChunk)
stream = out
go func() {
defer close(out)
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
if errClose := decodedBody.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
}()
// If from == to (Claude → Claude), directly forward the SSE stream without translation
if from == to {
scanner := bufio.NewScanner(httpResp.Body)
scanner := bufio.NewScanner(decodedBody)
buf := make([]byte, 20_971_520)
scanner.Buffer(buf, 20_971_520)
for scanner.Scan() {
@@ -228,7 +239,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
}
// For other formats, use translation
scanner := bufio.NewScanner(httpResp.Body)
scanner := bufio.NewScanner(decodedBody)
buf := make([]byte, 20_971_520)
scanner.Buffer(buf, 20_971_520)
var param any
@@ -304,29 +315,29 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
recordAPIResponseError(ctx, e.cfg, err)
return cliproxyexecutor.Response{}, err
}
defer func() {
if errClose := resp.Body.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
}()
recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
b, _ := io.ReadAll(resp.Body)
appendAPIResponseChunk(ctx, e.cfg, b)
if errClose := resp.Body.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(b)}
}
reader := io.Reader(resp.Body)
var decoder *zstd.Decoder
if hasZSTDEcoding(resp.Header.Get("Content-Encoding")) {
decoder, err = zstd.NewReader(resp.Body)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
return cliproxyexecutor.Response{}, fmt.Errorf("failed to initialize zstd decoder: %w", err)
decodedBody, err := decodeResponseBody(resp.Body, resp.Header.Get("Content-Encoding"))
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
if errClose := resp.Body.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
reader = decoder
defer decoder.Close()
return cliproxyexecutor.Response{}, err
}
data, err := io.ReadAll(reader)
defer func() {
if errClose := decodedBody.Close(); errClose != nil {
log.Errorf("response body close error: %v", errClose)
}
}()
data, err := io.ReadAll(decodedBody)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
return cliproxyexecutor.Response{}, err
@@ -419,7 +430,7 @@ func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.Cl
continue
}
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
if attrBase == "" || cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
return entry
}
}
@@ -438,17 +449,84 @@ func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.Cl
return nil
}
func hasZSTDEcoding(contentEncoding string) bool {
if contentEncoding == "" {
return false
}
parts := strings.Split(contentEncoding, ",")
for i := range parts {
if strings.EqualFold(strings.TrimSpace(parts[i]), "zstd") {
return true
type compositeReadCloser struct {
io.Reader
closers []func() error
}
func (c *compositeReadCloser) Close() error {
var firstErr error
for i := range c.closers {
if c.closers[i] == nil {
continue
}
if err := c.closers[i](); err != nil && firstErr == nil {
firstErr = err
}
}
return false
return firstErr
}
func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadCloser, error) {
if body == nil {
return nil, fmt.Errorf("response body is nil")
}
if contentEncoding == "" {
return body, nil
}
encodings := strings.Split(contentEncoding, ",")
for _, raw := range encodings {
encoding := strings.TrimSpace(strings.ToLower(raw))
switch encoding {
case "", "identity":
continue
case "gzip":
gzipReader, err := gzip.NewReader(body)
if err != nil {
_ = body.Close()
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
}
return &compositeReadCloser{
Reader: gzipReader,
closers: []func() error{
gzipReader.Close,
func() error { return body.Close() },
},
}, nil
case "deflate":
deflateReader := flate.NewReader(body)
return &compositeReadCloser{
Reader: deflateReader,
closers: []func() error{
deflateReader.Close,
func() error { return body.Close() },
},
}, nil
case "br":
return &compositeReadCloser{
Reader: brotli.NewReader(body),
closers: []func() error{
func() error { return body.Close() },
},
}, nil
case "zstd":
decoder, err := zstd.NewReader(body)
if err != nil {
_ = body.Close()
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
}
return &compositeReadCloser{
Reader: decoder,
closers: []func() error{
func() error { decoder.Close(); return nil },
func() error { return body.Close() },
},
}, nil
default:
continue
}
}
return body, nil
}
func applyClaudeHeaders(r *http.Request, apiKey string, stream bool) {
@@ -473,8 +551,8 @@ func applyClaudeHeaders(r *http.Request, apiKey string, stream bool) {
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Arch", "arm64")
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Os", "MacOS")
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Timeout", "60")
misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "claude-cli/1.0.83 (external, cli)")
r.Header.Set("Connection", "keep-alive")
r.Header.Set("User-Agent", "claude-cli/1.0.83 (external, cli)")
r.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd")
if stream {
r.Header.Set("Accept", "text/event-stream")

View File

@@ -532,6 +532,7 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string) {
misc.EnsureHeader(r.Header, ginHeaders, "Version", "0.21.0")
misc.EnsureHeader(r.Header, ginHeaders, "Openai-Beta", "responses=experimental")
misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "codex_cli_rs/0.50.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464")
r.Header.Set("Accept", "text/event-stream")
r.Header.Set("Connection", "Keep-Alive")

View File

@@ -63,9 +63,14 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if hasOverride {
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
}
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
action := "generateContent"
@@ -92,7 +97,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
var lastStatus int
var lastBody []byte
for _, attemptModel := range models {
for idx, attemptModel := range models {
payload := append([]byte(nil), basePayload...)
if action == "countTokens" {
payload = deleteJSONField(payload, "project")
@@ -101,7 +106,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel)
}
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token()
if errTok != nil {
@@ -166,7 +170,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
lastBody = append([]byte(nil), data...)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
if httpResp.StatusCode == 429 {
log.Debugf("gemini cli executor: rate limited, retrying with next model")
if idx+1 < len(models) {
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
} else {
log.Debug("gemini cli executor: rate limited, no additional fallback model")
}
continue
}
@@ -196,9 +204,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if hasOverride {
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
}
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
@@ -219,11 +232,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
var lastStatus int
var lastBody []byte
for _, attemptModel := range models {
for idx, attemptModel := range models {
payload := append([]byte(nil), basePayload...)
payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel)
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token()
if errTok != nil {
@@ -282,7 +294,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
lastBody = append([]byte(nil), data...)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
if httpResp.StatusCode == 429 {
log.Debugf("gemini cli executor: rate limited, retrying with next model")
if idx+1 < len(models) {
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
} else {
log.Debug("gemini cli executor: rate limited, no additional fallback model")
}
continue
}
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
@@ -393,12 +409,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
for _, attemptModel := range models {
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
if hasOverride {
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
}
payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model")
payload = disableGeminiThinkingConfig(payload, attemptModel)
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
tok, errTok := tokenSource.Token()
@@ -623,29 +643,6 @@ func cliPreviewFallbackOrder(model string) []string {
}
}
func disableGeminiThinkingConfig(body []byte, model string) []byte {
if !geminiModelDisallowsThinking(model) {
return body
}
updated := deleteJSONField(body, "request.generationConfig.thinkingConfig")
updated = deleteJSONField(updated, "generationConfig.thinkingConfig")
return updated
}
func geminiModelDisallowsThinking(model string) bool {
if model == "" {
return false
}
lower := strings.ToLower(model)
for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} {
if strings.Contains(lower, marker) {
return true
}
}
return false
}
// setJSONField sets a top-level JSON field on a byte slice payload via sjson.
func setJSONField(body []byte, key, value string) []byte {
if key == "" {

View File

@@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = disableGeminiThinkingConfig(body, req.Model)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
action := "generateContent"
@@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = disableGeminiThinkingConfig(body, req.Model)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
@@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
}
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")

View File

@@ -11,6 +11,7 @@ import (
"strings"
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -136,7 +137,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
}
// Build output Gemini CLI request JSON
out := `{"model":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}}`
out := `{"model":"","request":{"contents":[]}}`
out, _ = sjson.Set(out, "model", modelName)
if systemInstruction != nil {
b, _ := json.Marshal(systemInstruction)
@@ -151,21 +152,16 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
out, _ = sjson.SetRaw(out, "request.tools", string(b))
}
// Map reasoning and sampling configs
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
if reasoningEffortResult.String() == "none" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
} else if reasoningEffortResult.String() == "auto" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
} else if reasoningEffortResult.String() == "low" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
} else if reasoningEffortResult.String() == "medium" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
} else if reasoningEffortResult.String() == "high" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
} else {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := int(b.Int())
budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)

View File

@@ -26,32 +26,57 @@ import (
// - []byte: The transformed request data in Gemini CLI API format
func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := bytes.Clone(inputRawJSON)
// Base envelope
out := []byte(`{"project":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}},"model":"gemini-2.5-pro"}`)
// Base envelope (no default thinkingConfig)
out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)
// Model
out, _ = sjson.SetBytes(out, "model", modelName)
// Reasoning effort -> thinkingBudget/include_thoughts
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
re := gjson.GetBytes(rawJSON, "reasoning_effort")
if re.Exists() {
hasOfficialThinking := re.Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
switch re.String() {
case "none":
out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
} else {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
}
// Temperature/top_p/top_k
@@ -250,8 +275,34 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
if t.Get("type").String() == "function" {
fn := t.Get("function")
if fn.Exists() && fn.IsObject() {
parametersJsonSchema, _ := util.RenameKey(fn.Raw, "parameters", "parametersJsonSchema")
out, _ = sjson.SetRawBytes(out, fdPath+".-1", []byte(parametersJsonSchema))
fnRaw := fn.Raw
if fn.Get("parameters").Exists() {
renamed, errRename := util.RenameKey(fnRaw, "parameters", "parametersJsonSchema")
if errRename != nil {
log.Warnf("Failed to rename parameters for tool '%s': %v", fn.Get("name").String(), errRename)
} else {
fnRaw = renamed
}
} else {
var errSet error
fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.type", "object")
if errSet != nil {
log.Warnf("Failed to set default schema type for tool '%s': %v", fn.Get("name").String(), errSet)
continue
}
fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.properties", map[string]interface{}{})
if errSet != nil {
log.Warnf("Failed to set default schema properties for tool '%s': %v", fn.Get("name").String(), errSet)
continue
}
}
tmp, errSet := sjson.SetRawBytes(out, fdPath+".-1", []byte(fnRaw))
if errSet != nil {
log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
continue
}
out = tmp
}
}
}

View File

@@ -11,6 +11,7 @@ import (
"strings"
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -129,7 +130,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
}
// Build output Gemini CLI request JSON
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
out := `{"contents":[]}`
out, _ = sjson.Set(out, "model", modelName)
if systemInstruction != nil {
b, _ := json.Marshal(systemInstruction)
@@ -144,21 +145,16 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
out, _ = sjson.SetRaw(out, "tools", string(b))
}
// Map reasoning and sampling configs
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
if reasoningEffortResult.String() == "none" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
} else if reasoningEffortResult.String() == "auto" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
} else if reasoningEffortResult.String() == "low" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
} else if reasoningEffortResult.String() == "medium" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
} else if reasoningEffortResult.String() == "high" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
} else {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := int(b.Int())
budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
out, _ = sjson.Set(out, "generationConfig.temperature", v.Num)

View File

@@ -26,32 +26,58 @@ import (
// - []byte: The transformed request data in Gemini API format
func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := bytes.Clone(inputRawJSON)
// Base envelope
out := []byte(`{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`)
// Base envelope (no default thinkingConfig)
out := []byte(`{"contents":[]}`)
// Model
out, _ = sjson.SetBytes(out, "model", modelName)
// Reasoning effort -> thinkingBudget/include_thoughts
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
re := gjson.GetBytes(rawJSON, "reasoning_effort")
if re.Exists() {
hasOfficialThinking := re.Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
switch re.String() {
case "none":
out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
// Normalize budget to model range
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
} else {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
}
// Temperature/top_p/top_k

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -15,8 +16,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
_ = modelName // Unused but required by interface
_ = stream // Unused but required by interface
// Base Gemini API template
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
// Base Gemini API template (do not include thinkingConfig by default)
out := `{"contents":[]}`
root := gjson.ParseBytes(rawJSON)
@@ -242,23 +243,52 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
}
if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() {
// OpenAI official reasoning fields take precedence
hasOfficialThinking := root.Get("reasoning.effort").Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
reasoningEffort := root.Get("reasoning.effort")
switch reasoningEffort.String() {
case "none":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "minimal":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension (applies only when official fields are missing)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
}
return []byte(out)

View File

@@ -4,6 +4,7 @@ package chat_completions
import (
"bytes"
"github.com/tidwall/sjson"
)
// ConvertOpenAIRequestToOpenAI converts an OpenAI Chat Completions request (raw JSON)
@@ -17,5 +18,14 @@ import (
// Returns:
// - []byte: The transformed request data in Gemini CLI API format
func ConvertOpenAIRequestToOpenAI(modelName string, inputRawJSON []byte, _ bool) []byte {
return bytes.Clone(inputRawJSON)
// Update the "model" field in the JSON payload with the provided modelName
// The sjson.SetBytes function returns a new byte slice with the updated JSON.
updatedJSON, err := sjson.SetBytes(inputRawJSON, "model", modelName)
if err != nil {
// If there's an error, return the original JSON or handle the error appropriately.
// For now, we'll return the original, but in a real scenario, logging or a more robust error
// handling mechanism would be needed.
return bytes.Clone(inputRawJSON)
}
return updatedJSON
}

View File

@@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
}
return budgetPtr, includePtr, matched
}
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
// when the target model does not advertise Thinking capability. It cleans both
// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
// in case upstream injected thinking for an unsupported model.
func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
if ModelSupportsThinking(model) || len(body) == 0 {
return body
}
updated := body
// Gemini CLI path
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
// Standard Gemini path
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
return updated
}

69
internal/util/thinking.go Normal file
View File

@@ -0,0 +1,69 @@
package util
import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
)
// ModelSupportsThinking reports whether the given model has Thinking capability
// according to the model registry metadata (provider-agnostic).
func ModelSupportsThinking(model string) bool {
if model == "" {
return false
}
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
return info.Thinking != nil
}
return false
}
// NormalizeThinkingBudget clamps the requested thinking budget to the
// supported range for the specified model using registry metadata only.
// If the model is unknown or has no Thinking metadata, returns the original budget.
// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
// or min (0 if zero is allowed and mid <= 0).
func NormalizeThinkingBudget(model string, budget int) int {
if budget == -1 { // dynamic
if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
if dynamicAllowed {
return -1
}
mid := (min + max) / 2
if mid <= 0 && zeroAllowed {
return 0
}
if mid <= 0 {
return min
}
return mid
}
return -1
}
if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
if budget == 0 {
if zeroAllowed {
return 0
}
return min
}
if budget < min {
return min
}
if budget > max {
return max
}
return budget
}
return budget
}
// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
if model == "" {
return false, 0, 0, false, false
}
info := registry.GetGlobalRegistry().GetModelInfo(model)
if info == nil || info.Thinking == nil {
return false, 0, 0, false, false
}
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
}

View File

@@ -6,6 +6,7 @@ package handlers
import (
"fmt"
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
@@ -46,6 +47,9 @@ type BaseAPIHandler struct {
// Cfg holds the current application configuration.
Cfg *config.SDKConfig
// OpenAICompatProviders is a list of provider names for OpenAI compatibility.
OpenAICompatProviders []string
}
// NewBaseAPIHandlers creates a new API handlers instance.
@@ -57,10 +61,11 @@ type BaseAPIHandler struct {
//
// Returns:
// - *BaseAPIHandler: A new API handlers instance
func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager) *BaseAPIHandler {
func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager, openAICompatProviders []string) *BaseAPIHandler {
return &BaseAPIHandler{
Cfg: cfg,
AuthManager: authManager,
Cfg: cfg,
AuthManager: authManager,
OpenAICompatProviders: openAICompatProviders,
}
}
@@ -133,10 +138,9 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
// ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
// This path is the only supported execution route.
func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
normalizedModel, metadata := normalizeModelMetadata(modelName)
providers := util.GetProviderName(normalizedModel)
if len(providers) == 0 {
return nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
if errMsg != nil {
return nil, errMsg
}
req := coreexecutor.Request{
Model: normalizedModel,
@@ -176,10 +180,9 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
// ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
// This path is the only supported execution route.
func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
normalizedModel, metadata := normalizeModelMetadata(modelName)
providers := util.GetProviderName(normalizedModel)
if len(providers) == 0 {
return nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
if errMsg != nil {
return nil, errMsg
}
req := coreexecutor.Request{
Model: normalizedModel,
@@ -219,11 +222,10 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
// ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
// This path is the only supported execution route.
func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
normalizedModel, metadata := normalizeModelMetadata(modelName)
providers := util.GetProviderName(normalizedModel)
if len(providers) == 0 {
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
if errMsg != nil {
errChan := make(chan *interfaces.ErrorMessage, 1)
errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
errChan <- errMsg
close(errChan)
return nil, errChan
}
@@ -292,6 +294,58 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
return dataChan, errChan
}
func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
providerName, extractedModelName, isDynamic := h.parseDynamicModel(modelName)
// First, normalize the model name to handle suffixes like "-thinking-128"
// This needs to happen before determining the provider for non-dynamic models.
normalizedModel, metadata = normalizeModelMetadata(modelName)
if isDynamic {
providers = []string{providerName}
// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
// so we use it as the final normalizedModel.
normalizedModel = extractedModelName
} else {
// For non-dynamic models, use the normalizedModel to get the provider name.
providers = util.GetProviderName(normalizedModel)
}
if len(providers) == 0 {
return nil, "", nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
}
// If it's a dynamic model, the normalizedModel was already set to extractedModelName.
// If it's a non-dynamic model, normalizedModel was set by normalizeModelMetadata.
// So, normalizedModel is already correctly set at this point.
return providers, normalizedModel, metadata, nil
}
func (h *BaseAPIHandler) parseDynamicModel(modelName string) (providerName, model string, isDynamic bool) {
var providerPart, modelPart string
for _, sep := range []string{"://"} {
if parts := strings.SplitN(modelName, sep, 2); len(parts) == 2 {
providerPart = parts[0]
modelPart = parts[1]
break
}
}
if providerPart == "" {
return "", modelName, false
}
// Check if the provider is a configured openai-compatibility provider
for _, pName := range h.OpenAICompatProviders {
if pName == providerPart {
return providerPart, modelPart, true
}
}
return "", modelName, false
}
func cloneBytes(src []byte) []byte {
if len(src) == 0 {
return nil

View File

@@ -872,6 +872,11 @@ func (m *Manager) persist(ctx context.Context, auth *Auth) error {
if m.store == nil || auth == nil {
return nil
}
if auth.Attributes != nil {
if v := strings.ToLower(strings.TrimSpace(auth.Attributes["runtime_only"])); v == "true" {
return nil
}
}
// Skip persistence when metadata is absent (e.g., runtime-only auths).
if auth.Metadata == nil {
return nil

View File

@@ -157,15 +157,6 @@ func (a *Auth) AccountInfo() (string, string) {
return "oauth", v
}
}
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(a.Provider)), "aistudio-") {
if label := strings.TrimSpace(a.Label); label != "" {
return "oauth", label
}
if id := strings.TrimSpace(a.ID); id != "" {
return "oauth", id
}
return "oauth", "aistudio"
}
if a.Attributes != nil {
if v := a.Attributes["api_key"]; v != "" {
return "api_key", v

View File

@@ -194,15 +194,15 @@ func (s *Service) ensureWebsocketGateway() {
s.wsGateway = wsrelay.NewManager(opts)
}
func (s *Service) wsOnConnected(provider string) {
if s == nil || provider == "" {
func (s *Service) wsOnConnected(channelID string) {
if s == nil || channelID == "" {
return
}
if !strings.HasPrefix(strings.ToLower(provider), "aistudio-") {
if !strings.HasPrefix(strings.ToLower(channelID), "aistudio-") {
return
}
if s.coreManager != nil {
if existing, ok := s.coreManager.GetByID(provider); ok && existing != nil {
if existing, ok := s.coreManager.GetByID(channelID); ok && existing != nil {
if !existing.Disabled && existing.Status == coreauth.StatusActive {
return
}
@@ -210,36 +210,34 @@ func (s *Service) wsOnConnected(provider string) {
}
now := time.Now().UTC()
auth := &coreauth.Auth{
ID: provider,
Provider: provider,
Label: provider,
ID: channelID, // keep channel identifier as ID
Provider: "aistudio", // logical provider for switch routing
Label: channelID, // display original channel id
Status: coreauth.StatusActive,
CreatedAt: now,
UpdatedAt: now,
Attributes: map[string]string{"ws_provider": "gemini"},
Attributes: map[string]string{"runtime_only": "true"},
Metadata: map[string]any{"email": channelID}, // metadata drives logging and usage tracking
}
log.Infof("websocket provider connected: %s", provider)
log.Infof("websocket provider connected: %s", channelID)
s.applyCoreAuthAddOrUpdate(context.Background(), auth)
}
func (s *Service) wsOnDisconnected(provider string, reason error) {
if s == nil || provider == "" {
func (s *Service) wsOnDisconnected(channelID string, reason error) {
if s == nil || channelID == "" {
return
}
if reason != nil {
if strings.Contains(reason.Error(), "replaced by new connection") {
log.Infof("websocket provider replaced: %s", provider)
log.Infof("websocket provider replaced: %s", channelID)
return
}
log.Warnf("websocket provider disconnected: %s (%v)", provider, reason)
log.Warnf("websocket provider disconnected: %s (%v)", channelID, reason)
} else {
log.Infof("websocket provider disconnected: %s", provider)
log.Infof("websocket provider disconnected: %s", channelID)
}
ctx := context.Background()
s.applyCoreAuthRemoval(ctx, provider)
if s.coreManager != nil {
s.coreManager.UnregisterExecutor(provider)
}
s.applyCoreAuthRemoval(ctx, channelID)
}
func (s *Service) applyCoreAuthAddOrUpdate(ctx context.Context, auth *coreauth.Auth) {
@@ -317,17 +315,16 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) {
s.coreManager.RegisterExecutor(executor.NewOpenAICompatExecutor(compatProviderKey, s.cfg))
return
}
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(a.Provider)), "aistudio-") {
if s.wsGateway != nil {
s.coreManager.RegisterExecutor(executor.NewAistudioExecutor(s.cfg, a.Provider, s.wsGateway))
}
return
}
switch strings.ToLower(a.Provider) {
case "gemini":
s.coreManager.RegisterExecutor(executor.NewGeminiExecutor(s.cfg))
case "gemini-cli":
s.coreManager.RegisterExecutor(executor.NewGeminiCLIExecutor(s.cfg))
case "aistudio":
if s.wsGateway != nil {
s.coreManager.RegisterExecutor(executor.NewAIStudioExecutor(s.cfg, a.ID, s.wsGateway))
}
return
case "claude":
s.coreManager.RegisterExecutor(executor.NewClaudeExecutor(s.cfg))
case "codex":
@@ -609,13 +606,6 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
}
provider := strings.ToLower(strings.TrimSpace(a.Provider))
compatProviderKey, compatDisplayName, compatDetected := openAICompatInfoFromAuth(a)
if a.Attributes != nil {
if strings.EqualFold(a.Attributes["ws_provider"], "gemini") {
models := mergeGeminiModels()
GlobalModelRegistry().RegisterClient(a.ID, provider, models)
return
}
}
if compatDetected {
provider = "openai-compatibility"
}
@@ -625,6 +615,8 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
models = registry.GetGeminiModels()
case "gemini-cli":
models = registry.GetGeminiCLIModels()
case "aistudio":
models = registry.GetAIStudioModels()
case "claude":
models = registry.GetClaudeModels()
if entry := s.resolveConfigClaudeKey(a); entry != nil && len(entry.Models) > 0 {
@@ -726,27 +718,6 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
}
}
func mergeGeminiModels() []*ModelInfo {
models := make([]*ModelInfo, 0, 16)
seen := make(map[string]struct{})
appendModels := func(items []*ModelInfo) {
for i := range items {
m := items[i]
if m == nil || m.ID == "" {
continue
}
if _, ok := seen[m.ID]; ok {
continue
}
seen[m.ID] = struct{}{}
models = append(models, m)
}
}
appendModels(registry.GetGeminiModels())
appendModels(registry.GetGeminiCLIModels())
return models
}
func (s *Service) resolveConfigClaudeKey(auth *coreauth.Auth) *config.ClaudeKey {
if auth == nil || s.cfg == nil {
return nil