mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-02 12:30:50 +08:00
Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3e7b645346 | ||
|
|
24446a4dc4 | ||
|
|
475f473dab | ||
|
|
8dba32a077 | ||
|
|
1bbbd16df6 | ||
|
|
5cb378256b | ||
|
|
3ac5f05e8c | ||
|
|
58d30369b4 | ||
|
|
7dd93a4a25 | ||
|
|
2a3ee8d0e3 | ||
|
|
41577bce07 | ||
|
|
3d7aca22c0 | ||
|
|
680b3f5010 | ||
|
|
9d42e4b239 | ||
|
|
97af785aad | ||
|
|
0defb68c6c | ||
|
|
d6272d3300 | ||
|
|
c99d0dfb33 | ||
|
|
663b9b35ab | ||
|
|
5dced4c0a6 | ||
|
|
5891785125 | ||
|
|
ac3d47e8c0 | ||
|
|
e5ed2cba4a | ||
|
|
847c2502a5 |
57
README.md
57
README.md
@@ -23,6 +23,7 @@ Chinese providers have now been added: [Qwen Code](https://github.com/QwenLM/qwe
|
||||
- Multiple accounts with round-robin load balancing (Gemini, OpenAI, Claude, Qwen and iFlow)
|
||||
- Simple CLI authentication flows (Gemini, OpenAI, Claude, Qwen and iFlow)
|
||||
- Generative Language API Key support
|
||||
- AI Studio Build multi-account load balancing
|
||||
- Gemini CLI multi-account load balancing
|
||||
- Claude Code multi-account load balancing
|
||||
- Qwen Code multi-account load balancing
|
||||
@@ -68,6 +69,14 @@ brew install cliproxyapi
|
||||
brew services start cliproxyapi
|
||||
```
|
||||
|
||||
### Installation via CLIProxyAPI Linux Installer
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/brokechubb/cliproxyapi-installer/refs/heads/master/cliproxyapi-installer | bash
|
||||
```
|
||||
|
||||
Thanks to [brokechubb](https://github.com/brokechubb) for building the Linux installer!
|
||||
|
||||
## Usage
|
||||
|
||||
### GUI Client & Official WebUI
|
||||
@@ -260,12 +269,16 @@ console.log(await claudeResponse.json());
|
||||
- gemini-2.5-flash-lite
|
||||
- gemini-2.5-flash-image
|
||||
- gemini-2.5-flash-image-preview
|
||||
- gemini-pro-latest
|
||||
- gemini-flash-latest
|
||||
- gemini-flash-lite-latest
|
||||
- gpt-5
|
||||
- gpt-5-codex
|
||||
- claude-opus-4-1-20250805
|
||||
- claude-opus-4-20250514
|
||||
- claude-sonnet-4-20250514
|
||||
- claude-sonnet-4-5-20250929
|
||||
- claude-haiku-4-5-20251001
|
||||
- claude-3-7-sonnet-20250219
|
||||
- claude-3-5-haiku-20241022
|
||||
- qwen3-coder-plus
|
||||
@@ -277,7 +290,6 @@ console.log(await claudeResponse.json());
|
||||
- deepseek-r1
|
||||
- deepseek-v3
|
||||
- kimi-k2
|
||||
- glm-4.5
|
||||
- glm-4.6
|
||||
- tstars2.0
|
||||
- And other iFlow-supported models
|
||||
@@ -415,7 +427,7 @@ openai-compatibility:
|
||||
# api-keys:
|
||||
# - "sk-or-v1-...b780"
|
||||
# - "sk-or-v1-...b781"
|
||||
models: # The models supported by the provider.
|
||||
models: # The models supported by the provider. Or you can use a format such as openrouter://moonshotai/kimi-k2:free to request undefined models
|
||||
- name: "moonshotai/kimi-k2:free" # The actual model name.
|
||||
alias: "kimi-k2" # The alias used in the API.
|
||||
```
|
||||
@@ -510,28 +522,37 @@ openai-compatibility:
|
||||
alias: "kimi-k2"
|
||||
```
|
||||
|
||||
Legacy format (still supported):
|
||||
|
||||
```yaml
|
||||
openai-compatibility:
|
||||
- name: "openrouter"
|
||||
base-url: "https://openrouter.ai/api/v1"
|
||||
api-keys:
|
||||
- "sk-or-v1-...b780"
|
||||
- "sk-or-v1-...b781"
|
||||
models:
|
||||
- name: "moonshotai/kimi-k2:free"
|
||||
alias: "kimi-k2"
|
||||
```
|
||||
|
||||
Usage:
|
||||
|
||||
Call OpenAI's endpoint `/v1/chat/completions` with `model` set to the alias (e.g., `kimi-k2`). The proxy routes to the configured provider/model automatically.
|
||||
|
||||
Also, you may call Claude's endpoint `/v1/messages`, Gemini's `/v1beta/models/model-name:streamGenerateContent` or `/v1beta/models/model-name:generateContent`.
|
||||
|
||||
And you can always use Gemini CLI with `CODE_ASSIST_ENDPOINT` set to `http://127.0.0.1:8317` for these OpenAI-compatible provider's models.
|
||||
|
||||
### AI Studio Instructions
|
||||
|
||||
You can use this service (CLIProxyAPI) as a backend for [this AI Studio App](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL). Follow the steps below to configure it:
|
||||
|
||||
1. **Start the CLIProxyAPI Service**: Ensure your CLIProxyAPI instance is running, either locally or remotely.
|
||||
2. **Access the AI Studio App**: Log in to your Google account in your browser, then open the following link:
|
||||
- [https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL)
|
||||
|
||||
#### Connection Configuration
|
||||
|
||||
By default, the AI Studio App attempts to connect to a local CLIProxyAPI instance at `ws://127.0.0.1:8317`.
|
||||
|
||||
- **Connecting to a Remote Service**:
|
||||
If you need to connect to a remotely deployed CLIProxyAPI, modify the `config.ts` file in the AI Studio App to update the `WEBSOCKET_PROXY_URL` value.
|
||||
- Use the `wss://` protocol if your remote service has SSL enabled.
|
||||
- Use the `ws://` protocol if SSL is not enabled.
|
||||
|
||||
#### Authentication Configuration
|
||||
|
||||
By default, WebSocket connections to CLIProxyAPI do not require authentication.
|
||||
|
||||
- **Enable Authentication on the CLIProxyAPI Server**:
|
||||
In your `config.yaml` file, set `ws_auth` to `true`.
|
||||
- **Configure Authentication on the AI Studio Client**:
|
||||
In the `config.ts` file of the AI Studio App, set the `JWT_TOKEN` value to your authentication token.
|
||||
|
||||
### Authentication Directory
|
||||
|
||||
|
||||
56
README_CN.md
56
README_CN.md
@@ -43,6 +43,7 @@
|
||||
- 多账户支持与轮询负载均衡(Gemini、OpenAI、Claude、Qwen 与 iFlow)
|
||||
- 简单的 CLI 身份验证流程(Gemini、OpenAI、Claude、Qwen 与 iFlow)
|
||||
- 支持 Gemini AIStudio API 密钥
|
||||
- 支持 AI Studio Build 多账户轮询
|
||||
- 支持 Gemini CLI 多账户轮询
|
||||
- 支持 Claude Code 多账户轮询
|
||||
- 支持 Qwen Code 多账户轮询
|
||||
@@ -82,6 +83,14 @@ brew install cliproxyapi
|
||||
brew services start cliproxyapi
|
||||
```
|
||||
|
||||
### 通过 CLIProxyAPI Linux Installer 安装
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/brokechubb/cliproxyapi-installer/refs/heads/master/cliproxyapi-installer | bash
|
||||
```
|
||||
|
||||
感谢 [brokechubb](https://github.com/brokechubb) 构建了 Linux installer!
|
||||
|
||||
## 使用方法
|
||||
|
||||
### 图形客户端与官方 WebUI
|
||||
@@ -273,12 +282,16 @@ console.log(await claudeResponse.json());
|
||||
- gemini-2.5-flash-lite
|
||||
- gemini-2.5-flash-image
|
||||
- gemini-2.5-flash-image-preview
|
||||
- gemini-pro-latest
|
||||
- gemini-flash-latest
|
||||
- gemini-flash-lite-latest
|
||||
- gpt-5
|
||||
- gpt-5-codex
|
||||
- claude-opus-4-1-20250805
|
||||
- claude-opus-4-20250514
|
||||
- claude-sonnet-4-20250514
|
||||
- claude-sonnet-4-5-20250929
|
||||
- claude-haiku-4-5-20251001
|
||||
- claude-3-7-sonnet-20250219
|
||||
- claude-3-5-haiku-20241022
|
||||
- qwen3-coder-plus
|
||||
@@ -290,7 +303,6 @@ console.log(await claudeResponse.json());
|
||||
- deepseek-r1
|
||||
- deepseek-v3
|
||||
- kimi-k2
|
||||
- glm-4.5
|
||||
- glm-4.6
|
||||
- tstars2.0
|
||||
- 以及其他 iFlow 支持的模型
|
||||
@@ -428,7 +440,7 @@ openai-compatibility:
|
||||
# api-keys:
|
||||
# - "sk-or-v1-...b780"
|
||||
# - "sk-or-v1-...b781"
|
||||
models: # 提供商支持的模型。
|
||||
models: # 提供商支持的模型。或者你可以使用类似 openrouter://moonshotai/kimi-k2:free 这样的格式来请求未在这里定义的模型
|
||||
- name: "moonshotai/kimi-k2:free" # 实际的模型名称。
|
||||
alias: "kimi-k2" # 在API中使用的别名。
|
||||
```
|
||||
@@ -523,24 +535,36 @@ openai-compatibility:
|
||||
alias: "kimi-k2"
|
||||
```
|
||||
|
||||
旧格式(仍支持):
|
||||
|
||||
```yaml
|
||||
openai-compatibility:
|
||||
- name: "openrouter"
|
||||
base-url: "https://openrouter.ai/api/v1"
|
||||
api-keys:
|
||||
- "sk-or-v1-...b780"
|
||||
- "sk-or-v1-...b781"
|
||||
models:
|
||||
- name: "moonshotai/kimi-k2:free"
|
||||
alias: "kimi-k2"
|
||||
```
|
||||
|
||||
使用方式:在 `/v1/chat/completions` 中将 `model` 设为别名(如 `kimi-k2`),代理将自动路由到对应提供商与模型。
|
||||
|
||||
并且,对于这些与OpenAI兼容的提供商模型,您始终可以通过将CODE_ASSIST_ENDPOINT设置为 http://127.0.0.1:8317 来使用Gemini CLI。
|
||||
|
||||
### AI Studio 使用说明
|
||||
|
||||
您可以将本服务 (CLIProxyAPI) 作为后端,配合 [这个 AI Studio 应用](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL) 使用。请遵循以下步骤进行配置:
|
||||
|
||||
1. **启动 CLIProxyAPI 服务**:确保您的 CLIProxyAPI 实例正在本地或远程运行。
|
||||
2. **访问 AI Studio 应用**:在浏览器中登录您的 Google 账户,然后打开以下链接:
|
||||
- [https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL](https://aistudio.google.com/apps/drive/1CPW7FpWGsDZzkaYgYOyXQ_6FWgxieLmL)
|
||||
|
||||
#### 连接配置
|
||||
|
||||
默认情况下,AI Studio 应用会尝试连接到本地的 CLIProxyAPI (`ws://127.0.0.1:8317`)。
|
||||
|
||||
- **连接到远程服务**:
|
||||
如果您需要连接到远程部署的 CLIProxyAPI,请修改 AI Studio 应用中的 `config.ts` 文件,更新 `WEBSOCKET_PROXY_URL` 的值。
|
||||
- 如果您的远程服务启用了 SSL,请使用 `wss://` 协议。
|
||||
- 如果未启用 SSL,请使用 `ws://` 协议。
|
||||
|
||||
#### 认证配置
|
||||
|
||||
默认情况下,CLIProxyAPI 的 WebSocket 连接不要求认证。
|
||||
|
||||
- **在 CLIProxyAPI 服务端启用认证**:
|
||||
在您的 `config.yaml` 文件中,将 `ws_auth` 设置为 `true`。
|
||||
- **在 AI Studio 客户端配置认证**:
|
||||
在 AI Studio 应用的 `config.ts` 文件中,设置 `JWT_TOKEN` 的值为您的认证令牌。
|
||||
|
||||
### 身份验证目录
|
||||
|
||||
`auth-dir` 参数指定身份验证令牌的存储位置。当您运行登录命令时,应用程序将在此目录中创建包含 Google 账户身份验证令牌的 JSON 文件。多个账户可用于轮询。
|
||||
|
||||
@@ -146,6 +146,10 @@ func (MyExecutor) ExecuteStream(ctx context.Context, a *coreauth.Auth, req clipe
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func (MyExecutor) CountTokens(ctx context.Context, a *coreauth.Auth, req clipexec.Request, opts clipexec.Options) (clipexec.Response, error) {
|
||||
return clipexec.Response{}, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (MyExecutor) Refresh(ctx context.Context, a *coreauth.Auth) (*coreauth.Auth, error) {
|
||||
return a, nil
|
||||
}
|
||||
|
||||
1
go.mod
1
go.mod
@@ -28,6 +28,7 @@ require (
|
||||
cloud.google.com/go/compute/metadata v0.3.0 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/ProtonMail/go-crypto v1.3.0 // indirect
|
||||
github.com/andybalholm/brotli v1.0.6 // indirect
|
||||
github.com/bytedance/sonic v1.11.6 // indirect
|
||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||
github.com/cloudflare/circl v1.6.1 // indirect
|
||||
|
||||
2
go.sum
2
go.sum
@@ -4,6 +4,8 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw=
|
||||
github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE=
|
||||
github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI=
|
||||
github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
|
||||
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||
|
||||
@@ -225,9 +225,13 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
|
||||
envManagementSecret := envAdminPasswordSet && envAdminPassword != ""
|
||||
|
||||
// Create server instance
|
||||
providerNames := make([]string, 0, len(cfg.OpenAICompatibility))
|
||||
for _, p := range cfg.OpenAICompatibility {
|
||||
providerNames = append(providerNames, p.Name)
|
||||
}
|
||||
s := &Server{
|
||||
engine: engine,
|
||||
handlers: handlers.NewBaseAPIHandlers(&cfg.SDKConfig, authManager),
|
||||
handlers: handlers.NewBaseAPIHandlers(&cfg.SDKConfig, authManager, providerNames),
|
||||
cfg: cfg,
|
||||
accessManager: accessManager,
|
||||
requestLogger: requestLogger,
|
||||
@@ -823,6 +827,13 @@ func (s *Server) UpdateClients(cfg *config.Config) {
|
||||
managementasset.SetCurrentConfig(cfg)
|
||||
// Save YAML snapshot for next comparison
|
||||
s.oldConfigYaml, _ = yaml.Marshal(cfg)
|
||||
|
||||
providerNames := make([]string, 0, len(cfg.OpenAICompatibility))
|
||||
for _, p := range cfg.OpenAICompatibility {
|
||||
providerNames = append(providerNames, p.Name)
|
||||
}
|
||||
s.handlers.OpenAICompatProviders = providerNames
|
||||
|
||||
s.handlers.UpdateClients(&cfg.SDKConfig)
|
||||
|
||||
if !cfg.RemoteManagement.DisableControlPanel {
|
||||
@@ -903,5 +914,3 @@ func AuthMiddleware(manager *sdkaccess.Manager) gin.HandlerFunc {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// legacy clientsToSlice removed; handlers no longer consume legacy client slices
|
||||
|
||||
@@ -15,6 +15,10 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/andybalholm/brotli"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
)
|
||||
@@ -411,6 +415,10 @@ func (l *FileRequestLogger) decompressResponse(responseHeaders map[string][]stri
|
||||
return l.decompressGzip(response)
|
||||
case "deflate":
|
||||
return l.decompressDeflate(response)
|
||||
case "br":
|
||||
return l.decompressBrotli(response)
|
||||
case "zstd":
|
||||
return l.decompressZstd(response)
|
||||
default:
|
||||
// No compression or unsupported compression
|
||||
return response, nil
|
||||
@@ -431,7 +439,9 @@ func (l *FileRequestLogger) decompressGzip(data []byte) ([]byte, error) {
|
||||
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = reader.Close()
|
||||
if errClose := reader.Close(); errClose != nil {
|
||||
log.WithError(errClose).Warn("failed to close gzip reader in request logger")
|
||||
}
|
||||
}()
|
||||
|
||||
decompressed, err := io.ReadAll(reader)
|
||||
@@ -453,7 +463,9 @@ func (l *FileRequestLogger) decompressGzip(data []byte) ([]byte, error) {
|
||||
func (l *FileRequestLogger) decompressDeflate(data []byte) ([]byte, error) {
|
||||
reader := flate.NewReader(bytes.NewReader(data))
|
||||
defer func() {
|
||||
_ = reader.Close()
|
||||
if errClose := reader.Close(); errClose != nil {
|
||||
log.WithError(errClose).Warn("failed to close deflate reader in request logger")
|
||||
}
|
||||
}()
|
||||
|
||||
decompressed, err := io.ReadAll(reader)
|
||||
@@ -464,6 +476,48 @@ func (l *FileRequestLogger) decompressDeflate(data []byte) ([]byte, error) {
|
||||
return decompressed, nil
|
||||
}
|
||||
|
||||
// decompressBrotli decompresses brotli-encoded data.
|
||||
//
|
||||
// Parameters:
|
||||
// - data: The brotli-encoded data to decompress
|
||||
//
|
||||
// Returns:
|
||||
// - []byte: The decompressed data
|
||||
// - error: An error if decompression fails, nil otherwise
|
||||
func (l *FileRequestLogger) decompressBrotli(data []byte) ([]byte, error) {
|
||||
reader := brotli.NewReader(bytes.NewReader(data))
|
||||
|
||||
decompressed, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decompress brotli data: %w", err)
|
||||
}
|
||||
|
||||
return decompressed, nil
|
||||
}
|
||||
|
||||
// decompressZstd decompresses zstd-encoded data.
|
||||
//
|
||||
// Parameters:
|
||||
// - data: The zstd-encoded data to decompress
|
||||
//
|
||||
// Returns:
|
||||
// - []byte: The decompressed data
|
||||
// - error: An error if decompression fails, nil otherwise
|
||||
func (l *FileRequestLogger) decompressZstd(data []byte) ([]byte, error) {
|
||||
decoder, err := zstd.NewReader(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
|
||||
}
|
||||
defer decoder.Close()
|
||||
|
||||
decompressed, err := io.ReadAll(decoder)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decompress zstd data: %w", err)
|
||||
}
|
||||
|
||||
return decompressed, nil
|
||||
}
|
||||
|
||||
// formatRequestInfo creates the request information section of the log.
|
||||
//
|
||||
// Parameters:
|
||||
|
||||
@@ -68,84 +68,8 @@ func GetClaudeModels() []*ModelInfo {
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiModels returns the standard Gemini model definitions
|
||||
func GetGeminiModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash",
|
||||
Version: "001",
|
||||
DisplayName: "Gemini 2.5 Flash",
|
||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-pro",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Pro",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-lite",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Lite",
|
||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Flash Lite",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-image-preview",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-image-preview",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Image Preview",
|
||||
Description: "State-of-the-art image generation and editing model.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 8192,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-image",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-2.5-flash-image",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini 2.5 Flash Image",
|
||||
Description: "State-of-the-art image generation and editing model.",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 8192,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiCLIModels returns the standard Gemini model definitions
|
||||
func GetGeminiCLIModels() []*ModelInfo {
|
||||
// GeminiModels returns the shared base Gemini model set used by multiple providers.
|
||||
func GeminiModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
{
|
||||
ID: "gemini-2.5-flash",
|
||||
@@ -160,6 +84,7 @@ func GetGeminiCLIModels() []*ModelInfo {
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-pro",
|
||||
@@ -174,6 +99,7 @@ func GetGeminiCLIModels() []*ModelInfo {
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-lite",
|
||||
@@ -188,6 +114,7 @@ func GetGeminiCLIModels() []*ModelInfo {
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-image-preview",
|
||||
@@ -202,6 +129,7 @@ func GetGeminiCLIModels() []*ModelInfo {
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 8192,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
// image models don't support thinkingConfig; leave Thinking nil
|
||||
},
|
||||
{
|
||||
ID: "gemini-2.5-flash-image",
|
||||
@@ -216,10 +144,69 @@ func GetGeminiCLIModels() []*ModelInfo {
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 8192,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
// image models don't support thinkingConfig; leave Thinking nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiModels returns the standard Gemini model definitions
|
||||
func GetGeminiModels() []*ModelInfo { return GeminiModels() }
|
||||
|
||||
// GetGeminiCLIModels returns the standard Gemini model definitions
|
||||
func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() }
|
||||
|
||||
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
|
||||
func GetAIStudioModels() []*ModelInfo {
|
||||
base := GeminiModels()
|
||||
return append(base,
|
||||
&ModelInfo{
|
||||
ID: "gemini-pro-latest",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-pro-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Pro Latest",
|
||||
Description: "Latest release of Gemini Pro",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
&ModelInfo{
|
||||
ID: "gemini-flash-latest",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-flash-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Flash Latest",
|
||||
Description: "Latest release of Gemini Flash",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
&ModelInfo{
|
||||
ID: "gemini-flash-lite-latest",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-flash-lite-latest",
|
||||
Version: "2.5",
|
||||
DisplayName: "Gemini Flash-Lite Latest",
|
||||
Description: "Latest release of Gemini Flash-Lite",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// GetOpenAIModels returns the standard OpenAI model definitions
|
||||
func GetOpenAIModels() []*ModelInfo {
|
||||
return []*ModelInfo{
|
||||
@@ -417,7 +404,6 @@ func GetIFlowModels() []*ModelInfo {
|
||||
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language"},
|
||||
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build"},
|
||||
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905"},
|
||||
{ID: "glm-4.5", DisplayName: "GLM-4.5", Description: "Zhipu GLM 4.5 general model"},
|
||||
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model"},
|
||||
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model"},
|
||||
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental"},
|
||||
|
||||
@@ -45,6 +45,23 @@ type ModelInfo struct {
|
||||
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
|
||||
// SupportedParameters lists supported parameters
|
||||
SupportedParameters []string `json:"supported_parameters,omitempty"`
|
||||
|
||||
// Thinking holds provider-specific reasoning/thinking budget capabilities.
|
||||
// This is optional and currently used for Gemini thinking budget normalization.
|
||||
Thinking *ThinkingSupport `json:"thinking,omitempty"`
|
||||
}
|
||||
|
||||
// ThinkingSupport describes a model family's supported internal reasoning budget range.
|
||||
// Values are interpreted in provider-native token units.
|
||||
type ThinkingSupport struct {
|
||||
// Min is the minimum allowed thinking budget (inclusive).
|
||||
Min int `json:"min,omitempty"`
|
||||
// Max is the maximum allowed thinking budget (inclusive).
|
||||
Max int `json:"max,omitempty"`
|
||||
// ZeroAllowed indicates whether 0 is a valid value (to disable thinking).
|
||||
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
||||
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
||||
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
||||
}
|
||||
|
||||
// ModelRegistration tracks a model's availability
|
||||
@@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
|
||||
return result
|
||||
}
|
||||
|
||||
// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
|
||||
// Returns nil if the model is unknown to the registry.
|
||||
func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
|
||||
r.mutex.RLock()
|
||||
defer r.mutex.RUnlock()
|
||||
if reg, ok := r.models[modelID]; ok && reg != nil {
|
||||
return reg.Info
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// convertModelToMap converts ModelInfo to the appropriate format for different handler types
|
||||
func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any {
|
||||
if model == nil {
|
||||
|
||||
@@ -19,27 +19,27 @@ import (
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// AistudioExecutor routes AI Studio requests through a websocket-backed transport.
|
||||
type AistudioExecutor struct {
|
||||
// AIStudioExecutor routes AI Studio requests through a websocket-backed transport.
|
||||
type AIStudioExecutor struct {
|
||||
provider string
|
||||
relay *wsrelay.Manager
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewAistudioExecutor constructs a websocket executor for the provider name.
|
||||
func NewAistudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AistudioExecutor {
|
||||
return &AistudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
|
||||
// NewAIStudioExecutor constructs a websocket executor for the provider name.
|
||||
func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AIStudioExecutor {
|
||||
return &AIStudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns the provider key served by this executor.
|
||||
func (e *AistudioExecutor) Identifier() string { return e.provider }
|
||||
// Identifier returns the logical provider key for routing.
|
||||
func (e *AIStudioExecutor) Identifier() string { return "aistudio" }
|
||||
|
||||
// PrepareRequest is a no-op because websocket transport already injects headers.
|
||||
func (e *AistudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
|
||||
func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
@@ -66,14 +66,14 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
Method: http.MethodPost,
|
||||
Headers: wsReq.Headers.Clone(),
|
||||
Body: bytes.Clone(body.payload),
|
||||
Provider: e.provider,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
wsResp, err := e.relay.NonStream(ctx, e.provider, wsReq)
|
||||
wsResp, err := e.relay.NonStream(ctx, authID, wsReq)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return resp, err
|
||||
@@ -92,7 +92,7 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
@@ -118,13 +118,13 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
Method: http.MethodPost,
|
||||
Headers: wsReq.Headers.Clone(),
|
||||
Body: bytes.Clone(body.payload),
|
||||
Provider: e.provider,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
wsStream, err := e.relay.Stream(ctx, e.provider, wsReq)
|
||||
wsStream, err := e.relay.Stream(ctx, authID, wsReq)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return nil, err
|
||||
@@ -151,7 +151,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
case wsrelay.MessageTypeStreamChunk:
|
||||
if len(event.Payload) > 0 {
|
||||
appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
|
||||
filtered := filterAistudioUsageMetadata(event.Payload)
|
||||
filtered := filterAIStudioUsageMetadata(event.Payload)
|
||||
if detail, ok := parseGeminiStreamUsage(filtered); ok {
|
||||
reporter.publish(ctx, detail)
|
||||
}
|
||||
@@ -188,7 +188,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
_, body, err := e.translateRequest(req, opts, false)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, err
|
||||
@@ -215,13 +215,13 @@ func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
|
||||
Method: http.MethodPost,
|
||||
Headers: wsReq.Headers.Clone(),
|
||||
Body: bytes.Clone(body.payload),
|
||||
Provider: e.provider,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
resp, err := e.relay.NonStream(ctx, e.provider, wsReq)
|
||||
resp, err := e.relay.NonStream(ctx, authID, wsReq)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return cliproxyexecutor.Response{}, err
|
||||
@@ -241,7 +241,7 @@ func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
|
||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||
}
|
||||
|
||||
func (e *AistudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
func (e *AIStudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
_ = ctx
|
||||
return auth, nil
|
||||
}
|
||||
@@ -252,14 +252,18 @@ type translatedPayload struct {
|
||||
toFormat sdktranslator.Format
|
||||
}
|
||||
|
||||
func (e *AistudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
|
||||
func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
payload = disableGeminiThinkingConfig(payload, req.Model)
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
||||
metadataAction := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -275,7 +279,7 @@ func (e *AistudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
|
||||
return payload, translatedPayload{payload: payload, action: action, toFormat: to}, nil
|
||||
}
|
||||
|
||||
func (e *AistudioExecutor) buildEndpoint(model, action, alt string) string {
|
||||
func (e *AIStudioExecutor) buildEndpoint(model, action, alt string) string {
|
||||
base := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, model, action)
|
||||
if action == "streamGenerateContent" {
|
||||
if alt == "" {
|
||||
@@ -289,9 +293,9 @@ func (e *AistudioExecutor) buildEndpoint(model, action, alt string) string {
|
||||
return base
|
||||
}
|
||||
|
||||
// filterAistudioUsageMetadata removes usageMetadata from intermediate SSE events so that
|
||||
// filterAIStudioUsageMetadata removes usageMetadata from intermediate SSE events so that
|
||||
// only the terminal chunk retains token statistics.
|
||||
func filterAistudioUsageMetadata(payload []byte) []byte {
|
||||
func filterAIStudioUsageMetadata(payload []byte) []byte {
|
||||
if len(payload) == 0 {
|
||||
return payload
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ package executor
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"compress/flate"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -10,6 +12,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/andybalholm/brotli"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
@@ -89,31 +92,31 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return resp, err
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(b))
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
reader := io.Reader(httpResp.Body)
|
||||
var decoder *zstd.Decoder
|
||||
if hasZSTDEcoding(httpResp.Header.Get("Content-Encoding")) {
|
||||
decoder, err = zstd.NewReader(httpResp.Body)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return resp, fmt.Errorf("failed to initialize zstd decoder: %w", err)
|
||||
decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding"))
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
reader = decoder
|
||||
defer decoder.Close()
|
||||
return resp, err
|
||||
}
|
||||
data, err := io.ReadAll(reader)
|
||||
defer func() {
|
||||
if errClose := decodedBody.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
data, err := io.ReadAll(decodedBody)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return resp, err
|
||||
@@ -192,19 +195,27 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
return nil, err
|
||||
}
|
||||
decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding"))
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out := make(chan cliproxyexecutor.StreamChunk)
|
||||
stream = out
|
||||
go func() {
|
||||
defer close(out)
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
if errClose := decodedBody.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
|
||||
// If from == to (Claude → Claude), directly forward the SSE stream without translation
|
||||
if from == to {
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner := bufio.NewScanner(decodedBody)
|
||||
buf := make([]byte, 20_971_520)
|
||||
scanner.Buffer(buf, 20_971_520)
|
||||
for scanner.Scan() {
|
||||
@@ -228,7 +239,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
|
||||
// For other formats, use translation
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner := bufio.NewScanner(decodedBody)
|
||||
buf := make([]byte, 20_971_520)
|
||||
scanner.Buffer(buf, 20_971_520)
|
||||
var param any
|
||||
@@ -304,29 +315,29 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
defer func() {
|
||||
if errClose := resp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(resp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
if errClose := resp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(b)}
|
||||
}
|
||||
reader := io.Reader(resp.Body)
|
||||
var decoder *zstd.Decoder
|
||||
if hasZSTDEcoding(resp.Header.Get("Content-Encoding")) {
|
||||
decoder, err = zstd.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("failed to initialize zstd decoder: %w", err)
|
||||
decodedBody, err := decodeResponseBody(resp.Body, resp.Header.Get("Content-Encoding"))
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
if errClose := resp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
reader = decoder
|
||||
defer decoder.Close()
|
||||
return cliproxyexecutor.Response{}, err
|
||||
}
|
||||
data, err := io.ReadAll(reader)
|
||||
defer func() {
|
||||
if errClose := decodedBody.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
data, err := io.ReadAll(decodedBody)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
return cliproxyexecutor.Response{}, err
|
||||
@@ -419,7 +430,7 @@ func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.Cl
|
||||
continue
|
||||
}
|
||||
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
|
||||
if attrBase == "" || cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
||||
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
@@ -438,17 +449,84 @@ func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.Cl
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasZSTDEcoding(contentEncoding string) bool {
|
||||
if contentEncoding == "" {
|
||||
return false
|
||||
}
|
||||
parts := strings.Split(contentEncoding, ",")
|
||||
for i := range parts {
|
||||
if strings.EqualFold(strings.TrimSpace(parts[i]), "zstd") {
|
||||
return true
|
||||
type compositeReadCloser struct {
|
||||
io.Reader
|
||||
closers []func() error
|
||||
}
|
||||
|
||||
func (c *compositeReadCloser) Close() error {
|
||||
var firstErr error
|
||||
for i := range c.closers {
|
||||
if c.closers[i] == nil {
|
||||
continue
|
||||
}
|
||||
if err := c.closers[i](); err != nil && firstErr == nil {
|
||||
firstErr = err
|
||||
}
|
||||
}
|
||||
return false
|
||||
return firstErr
|
||||
}
|
||||
|
||||
func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadCloser, error) {
|
||||
if body == nil {
|
||||
return nil, fmt.Errorf("response body is nil")
|
||||
}
|
||||
if contentEncoding == "" {
|
||||
return body, nil
|
||||
}
|
||||
encodings := strings.Split(contentEncoding, ",")
|
||||
for _, raw := range encodings {
|
||||
encoding := strings.TrimSpace(strings.ToLower(raw))
|
||||
switch encoding {
|
||||
case "", "identity":
|
||||
continue
|
||||
case "gzip":
|
||||
gzipReader, err := gzip.NewReader(body)
|
||||
if err != nil {
|
||||
_ = body.Close()
|
||||
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
|
||||
}
|
||||
return &compositeReadCloser{
|
||||
Reader: gzipReader,
|
||||
closers: []func() error{
|
||||
gzipReader.Close,
|
||||
func() error { return body.Close() },
|
||||
},
|
||||
}, nil
|
||||
case "deflate":
|
||||
deflateReader := flate.NewReader(body)
|
||||
return &compositeReadCloser{
|
||||
Reader: deflateReader,
|
||||
closers: []func() error{
|
||||
deflateReader.Close,
|
||||
func() error { return body.Close() },
|
||||
},
|
||||
}, nil
|
||||
case "br":
|
||||
return &compositeReadCloser{
|
||||
Reader: brotli.NewReader(body),
|
||||
closers: []func() error{
|
||||
func() error { return body.Close() },
|
||||
},
|
||||
}, nil
|
||||
case "zstd":
|
||||
decoder, err := zstd.NewReader(body)
|
||||
if err != nil {
|
||||
_ = body.Close()
|
||||
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
|
||||
}
|
||||
return &compositeReadCloser{
|
||||
Reader: decoder,
|
||||
closers: []func() error{
|
||||
func() error { decoder.Close(); return nil },
|
||||
func() error { return body.Close() },
|
||||
},
|
||||
}, nil
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
|
||||
func applyClaudeHeaders(r *http.Request, apiKey string, stream bool) {
|
||||
@@ -473,8 +551,8 @@ func applyClaudeHeaders(r *http.Request, apiKey string, stream bool) {
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Arch", "arm64")
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Os", "MacOS")
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Timeout", "60")
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "claude-cli/1.0.83 (external, cli)")
|
||||
r.Header.Set("Connection", "keep-alive")
|
||||
r.Header.Set("User-Agent", "claude-cli/1.0.83 (external, cli)")
|
||||
r.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd")
|
||||
if stream {
|
||||
r.Header.Set("Accept", "text/event-stream")
|
||||
|
||||
@@ -532,6 +532,7 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string) {
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "Version", "0.21.0")
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "Openai-Beta", "responses=experimental")
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
|
||||
misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "codex_cli_rs/0.50.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464")
|
||||
|
||||
r.Header.Set("Accept", "text/event-stream")
|
||||
r.Header.Set("Connection", "Keep-Alive")
|
||||
|
||||
@@ -63,9 +63,14 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if hasOverride {
|
||||
if hasOverride && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
|
||||
}
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
|
||||
action := "generateContent"
|
||||
@@ -92,7 +97,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
var lastStatus int
|
||||
var lastBody []byte
|
||||
|
||||
for _, attemptModel := range models {
|
||||
for idx, attemptModel := range models {
|
||||
payload := append([]byte(nil), basePayload...)
|
||||
if action == "countTokens" {
|
||||
payload = deleteJSONField(payload, "project")
|
||||
@@ -101,7 +106,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
payload = setJSONField(payload, "project", projectID)
|
||||
payload = setJSONField(payload, "model", attemptModel)
|
||||
}
|
||||
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
||||
|
||||
tok, errTok := tokenSource.Token()
|
||||
if errTok != nil {
|
||||
@@ -166,7 +170,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
lastBody = append([]byte(nil), data...)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
|
||||
if httpResp.StatusCode == 429 {
|
||||
log.Debugf("gemini cli executor: rate limited, retrying with next model")
|
||||
if idx+1 < len(models) {
|
||||
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||
} else {
|
||||
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -196,9 +204,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if hasOverride {
|
||||
if hasOverride && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
|
||||
}
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
|
||||
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
|
||||
@@ -219,11 +232,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
var lastStatus int
|
||||
var lastBody []byte
|
||||
|
||||
for _, attemptModel := range models {
|
||||
for idx, attemptModel := range models {
|
||||
payload := append([]byte(nil), basePayload...)
|
||||
payload = setJSONField(payload, "project", projectID)
|
||||
payload = setJSONField(payload, "model", attemptModel)
|
||||
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
||||
|
||||
tok, errTok := tokenSource.Token()
|
||||
if errTok != nil {
|
||||
@@ -282,7 +294,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
lastBody = append([]byte(nil), data...)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
|
||||
if httpResp.StatusCode == 429 {
|
||||
log.Debugf("gemini cli executor: rate limited, retrying with next model")
|
||||
if idx+1 < len(models) {
|
||||
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||
} else {
|
||||
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||
}
|
||||
continue
|
||||
}
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
@@ -393,12 +409,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
||||
for _, attemptModel := range models {
|
||||
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
||||
if hasOverride {
|
||||
if hasOverride && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
|
||||
|
||||
tok, errTok := tokenSource.Token()
|
||||
@@ -623,29 +643,6 @@ func cliPreviewFallbackOrder(model string) []string {
|
||||
}
|
||||
}
|
||||
|
||||
func disableGeminiThinkingConfig(body []byte, model string) []byte {
|
||||
if !geminiModelDisallowsThinking(model) {
|
||||
return body
|
||||
}
|
||||
|
||||
updated := deleteJSONField(body, "request.generationConfig.thinkingConfig")
|
||||
updated = deleteJSONField(updated, "generationConfig.thinkingConfig")
|
||||
return updated
|
||||
}
|
||||
|
||||
func geminiModelDisallowsThinking(model string) bool {
|
||||
if model == "" {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(model)
|
||||
for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} {
|
||||
if strings.Contains(lower, marker) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// setJSONField sets a top-level JSON field on a byte slice payload via sjson.
|
||||
func setJSONField(body []byte, key, value string) []byte {
|
||||
if key == "" {
|
||||
|
||||
@@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = disableGeminiThinkingConfig(body, req.Model)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
|
||||
action := "generateContent"
|
||||
@@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = disableGeminiThinkingConfig(body, req.Model)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
|
||||
@@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"strings"
|
||||
|
||||
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -136,7 +137,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
}
|
||||
|
||||
// Build output Gemini CLI request JSON
|
||||
out := `{"model":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}}`
|
||||
out := `{"model":"","request":{"contents":[]}}`
|
||||
out, _ = sjson.Set(out, "model", modelName)
|
||||
if systemInstruction != nil {
|
||||
b, _ := json.Marshal(systemInstruction)
|
||||
@@ -151,21 +152,16 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
out, _ = sjson.SetRaw(out, "request.tools", string(b))
|
||||
}
|
||||
|
||||
// Map reasoning and sampling configs
|
||||
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
if reasoningEffortResult.String() == "none" {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||
} else if reasoningEffortResult.String() == "auto" {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
} else if reasoningEffortResult.String() == "low" {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
} else if reasoningEffortResult.String() == "medium" {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
} else if reasoningEffortResult.String() == "high" {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
|
||||
} else {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
budget = util.NormalizeThinkingBudget(modelName, budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
||||
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)
|
||||
|
||||
@@ -26,32 +26,57 @@ import (
|
||||
// - []byte: The transformed request data in Gemini CLI API format
|
||||
func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
|
||||
rawJSON := bytes.Clone(inputRawJSON)
|
||||
// Base envelope
|
||||
out := []byte(`{"project":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}},"model":"gemini-2.5-pro"}`)
|
||||
// Base envelope (no default thinkingConfig)
|
||||
out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)
|
||||
|
||||
// Model
|
||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||
|
||||
// Reasoning effort -> thinkingBudget/include_thoughts
|
||||
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
if re.Exists() {
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
switch re.String() {
|
||||
case "none":
|
||||
out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||
case "auto":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "low":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "medium":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "high":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
default:
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var normalized int
|
||||
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
setBudget = true
|
||||
}
|
||||
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget {
|
||||
if normalized != 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
}
|
||||
|
||||
// Temperature/top_p/top_k
|
||||
@@ -250,8 +275,34 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
if t.Get("type").String() == "function" {
|
||||
fn := t.Get("function")
|
||||
if fn.Exists() && fn.IsObject() {
|
||||
parametersJsonSchema, _ := util.RenameKey(fn.Raw, "parameters", "parametersJsonSchema")
|
||||
out, _ = sjson.SetRawBytes(out, fdPath+".-1", []byte(parametersJsonSchema))
|
||||
fnRaw := fn.Raw
|
||||
if fn.Get("parameters").Exists() {
|
||||
renamed, errRename := util.RenameKey(fnRaw, "parameters", "parametersJsonSchema")
|
||||
if errRename != nil {
|
||||
log.Warnf("Failed to rename parameters for tool '%s': %v", fn.Get("name").String(), errRename)
|
||||
} else {
|
||||
fnRaw = renamed
|
||||
}
|
||||
} else {
|
||||
var errSet error
|
||||
fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.type", "object")
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set default schema type for tool '%s': %v", fn.Get("name").String(), errSet)
|
||||
continue
|
||||
}
|
||||
fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.properties", map[string]interface{}{})
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to set default schema properties for tool '%s': %v", fn.Get("name").String(), errSet)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
tmp, errSet := sjson.SetRawBytes(out, fdPath+".-1", []byte(fnRaw))
|
||||
if errSet != nil {
|
||||
log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
|
||||
continue
|
||||
}
|
||||
out = tmp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"strings"
|
||||
|
||||
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -129,7 +130,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
}
|
||||
|
||||
// Build output Gemini CLI request JSON
|
||||
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
|
||||
out := `{"contents":[]}`
|
||||
out, _ = sjson.Set(out, "model", modelName)
|
||||
if systemInstruction != nil {
|
||||
b, _ := json.Marshal(systemInstruction)
|
||||
@@ -144,21 +145,16 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
out, _ = sjson.SetRaw(out, "tools", string(b))
|
||||
}
|
||||
|
||||
// Map reasoning and sampling configs
|
||||
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
if reasoningEffortResult.String() == "none" {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||
} else if reasoningEffortResult.String() == "auto" {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
} else if reasoningEffortResult.String() == "low" {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
} else if reasoningEffortResult.String() == "medium" {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
} else if reasoningEffortResult.String() == "high" {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
|
||||
} else {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
budget = util.NormalizeThinkingBudget(modelName, budget)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
||||
out, _ = sjson.Set(out, "generationConfig.temperature", v.Num)
|
||||
|
||||
@@ -26,32 +26,58 @@ import (
|
||||
// - []byte: The transformed request data in Gemini API format
|
||||
func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
|
||||
rawJSON := bytes.Clone(inputRawJSON)
|
||||
// Base envelope
|
||||
out := []byte(`{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`)
|
||||
// Base envelope (no default thinkingConfig)
|
||||
out := []byte(`{"contents":[]}`)
|
||||
|
||||
// Model
|
||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||
|
||||
// Reasoning effort -> thinkingBudget/include_thoughts
|
||||
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||
if re.Exists() {
|
||||
hasOfficialThinking := re.Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
switch re.String() {
|
||||
case "none":
|
||||
out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||
case "auto":
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "low":
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "medium":
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "high":
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
default:
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
|
||||
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var normalized int
|
||||
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
// Normalize budget to model range
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
setBudget = true
|
||||
}
|
||||
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget {
|
||||
if normalized != 0 {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
}
|
||||
|
||||
// Temperature/top_p/top_k
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -15,8 +16,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
||||
_ = modelName // Unused but required by interface
|
||||
_ = stream // Unused but required by interface
|
||||
|
||||
// Base Gemini API template
|
||||
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
|
||||
// Base Gemini API template (do not include thinkingConfig by default)
|
||||
out := `{"contents":[]}`
|
||||
|
||||
root := gjson.ParseBytes(rawJSON)
|
||||
|
||||
@@ -242,23 +243,52 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
||||
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
|
||||
}
|
||||
|
||||
if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() {
|
||||
// OpenAI official reasoning fields take precedence
|
||||
hasOfficialThinking := root.Get("reasoning.effort").Exists()
|
||||
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
reasoningEffort := root.Get("reasoning.effort")
|
||||
switch reasoningEffort.String() {
|
||||
case "none":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||
case "auto":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "minimal":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "low":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "medium":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "high":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
default:
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
|
||||
// Cherry Studio extension (applies only when official fields are missing)
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var normalized int
|
||||
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
setBudget = true
|
||||
}
|
||||
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget {
|
||||
if normalized != 0 {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return []byte(out)
|
||||
|
||||
@@ -4,6 +4,7 @@ package chat_completions
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// ConvertOpenAIRequestToOpenAI converts an OpenAI Chat Completions request (raw JSON)
|
||||
@@ -17,5 +18,14 @@ import (
|
||||
// Returns:
|
||||
// - []byte: The transformed request data in Gemini CLI API format
|
||||
func ConvertOpenAIRequestToOpenAI(modelName string, inputRawJSON []byte, _ bool) []byte {
|
||||
return bytes.Clone(inputRawJSON)
|
||||
// Update the "model" field in the JSON payload with the provided modelName
|
||||
// The sjson.SetBytes function returns a new byte slice with the updated JSON.
|
||||
updatedJSON, err := sjson.SetBytes(inputRawJSON, "model", modelName)
|
||||
if err != nil {
|
||||
// If there's an error, return the original JSON or handle the error appropriately.
|
||||
// For now, we'll return the original, but in a real scenario, logging or a more robust error
|
||||
// handling mechanism would be needed.
|
||||
return bytes.Clone(inputRawJSON)
|
||||
}
|
||||
return updatedJSON
|
||||
}
|
||||
|
||||
@@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
|
||||
}
|
||||
return budgetPtr, includePtr, matched
|
||||
}
|
||||
|
||||
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
|
||||
// when the target model does not advertise Thinking capability. It cleans both
|
||||
// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
|
||||
// in case upstream injected thinking for an unsupported model.
|
||||
func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
|
||||
if ModelSupportsThinking(model) || len(body) == 0 {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
// Gemini CLI path
|
||||
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
|
||||
// Standard Gemini path
|
||||
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
|
||||
return updated
|
||||
}
|
||||
|
||||
69
internal/util/thinking.go
Normal file
69
internal/util/thinking.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
// ModelSupportsThinking reports whether the given model has Thinking capability
|
||||
// according to the model registry metadata (provider-agnostic).
|
||||
func ModelSupportsThinking(model string) bool {
|
||||
if model == "" {
|
||||
return false
|
||||
}
|
||||
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
|
||||
return info.Thinking != nil
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// NormalizeThinkingBudget clamps the requested thinking budget to the
|
||||
// supported range for the specified model using registry metadata only.
|
||||
// If the model is unknown or has no Thinking metadata, returns the original budget.
|
||||
// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
|
||||
// or min (0 if zero is allowed and mid <= 0).
|
||||
func NormalizeThinkingBudget(model string, budget int) int {
|
||||
if budget == -1 { // dynamic
|
||||
if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
|
||||
if dynamicAllowed {
|
||||
return -1
|
||||
}
|
||||
mid := (min + max) / 2
|
||||
if mid <= 0 && zeroAllowed {
|
||||
return 0
|
||||
}
|
||||
if mid <= 0 {
|
||||
return min
|
||||
}
|
||||
return mid
|
||||
}
|
||||
return -1
|
||||
}
|
||||
if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
|
||||
if budget == 0 {
|
||||
if zeroAllowed {
|
||||
return 0
|
||||
}
|
||||
return min
|
||||
}
|
||||
if budget < min {
|
||||
return min
|
||||
}
|
||||
if budget > max {
|
||||
return max
|
||||
}
|
||||
return budget
|
||||
}
|
||||
return budget
|
||||
}
|
||||
|
||||
// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
|
||||
func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
|
||||
if model == "" {
|
||||
return false, 0, 0, false, false
|
||||
}
|
||||
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||
if info == nil || info.Thinking == nil {
|
||||
return false, 0, 0, false, false
|
||||
}
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
}
|
||||
@@ -6,6 +6,7 @@ package handlers
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||
@@ -46,6 +47,9 @@ type BaseAPIHandler struct {
|
||||
|
||||
// Cfg holds the current application configuration.
|
||||
Cfg *config.SDKConfig
|
||||
|
||||
// OpenAICompatProviders is a list of provider names for OpenAI compatibility.
|
||||
OpenAICompatProviders []string
|
||||
}
|
||||
|
||||
// NewBaseAPIHandlers creates a new API handlers instance.
|
||||
@@ -57,10 +61,11 @@ type BaseAPIHandler struct {
|
||||
//
|
||||
// Returns:
|
||||
// - *BaseAPIHandler: A new API handlers instance
|
||||
func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager) *BaseAPIHandler {
|
||||
func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager, openAICompatProviders []string) *BaseAPIHandler {
|
||||
return &BaseAPIHandler{
|
||||
Cfg: cfg,
|
||||
AuthManager: authManager,
|
||||
Cfg: cfg,
|
||||
AuthManager: authManager,
|
||||
OpenAICompatProviders: openAICompatProviders,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,10 +138,9 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
|
||||
// ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
|
||||
// This path is the only supported execution route.
|
||||
func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
|
||||
normalizedModel, metadata := normalizeModelMetadata(modelName)
|
||||
providers := util.GetProviderName(normalizedModel)
|
||||
if len(providers) == 0 {
|
||||
return nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
|
||||
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
|
||||
if errMsg != nil {
|
||||
return nil, errMsg
|
||||
}
|
||||
req := coreexecutor.Request{
|
||||
Model: normalizedModel,
|
||||
@@ -176,10 +180,9 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
|
||||
// ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
|
||||
// This path is the only supported execution route.
|
||||
func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
|
||||
normalizedModel, metadata := normalizeModelMetadata(modelName)
|
||||
providers := util.GetProviderName(normalizedModel)
|
||||
if len(providers) == 0 {
|
||||
return nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
|
||||
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
|
||||
if errMsg != nil {
|
||||
return nil, errMsg
|
||||
}
|
||||
req := coreexecutor.Request{
|
||||
Model: normalizedModel,
|
||||
@@ -219,11 +222,10 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
|
||||
// ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
|
||||
// This path is the only supported execution route.
|
||||
func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
|
||||
normalizedModel, metadata := normalizeModelMetadata(modelName)
|
||||
providers := util.GetProviderName(normalizedModel)
|
||||
if len(providers) == 0 {
|
||||
providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
|
||||
if errMsg != nil {
|
||||
errChan := make(chan *interfaces.ErrorMessage, 1)
|
||||
errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
|
||||
errChan <- errMsg
|
||||
close(errChan)
|
||||
return nil, errChan
|
||||
}
|
||||
@@ -292,6 +294,58 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
|
||||
return dataChan, errChan
|
||||
}
|
||||
|
||||
func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
|
||||
providerName, extractedModelName, isDynamic := h.parseDynamicModel(modelName)
|
||||
|
||||
// First, normalize the model name to handle suffixes like "-thinking-128"
|
||||
// This needs to happen before determining the provider for non-dynamic models.
|
||||
normalizedModel, metadata = normalizeModelMetadata(modelName)
|
||||
|
||||
if isDynamic {
|
||||
providers = []string{providerName}
|
||||
// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
|
||||
// so we use it as the final normalizedModel.
|
||||
normalizedModel = extractedModelName
|
||||
} else {
|
||||
// For non-dynamic models, use the normalizedModel to get the provider name.
|
||||
providers = util.GetProviderName(normalizedModel)
|
||||
}
|
||||
|
||||
if len(providers) == 0 {
|
||||
return nil, "", nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
|
||||
}
|
||||
|
||||
// If it's a dynamic model, the normalizedModel was already set to extractedModelName.
|
||||
// If it's a non-dynamic model, normalizedModel was set by normalizeModelMetadata.
|
||||
// So, normalizedModel is already correctly set at this point.
|
||||
|
||||
return providers, normalizedModel, metadata, nil
|
||||
}
|
||||
|
||||
func (h *BaseAPIHandler) parseDynamicModel(modelName string) (providerName, model string, isDynamic bool) {
|
||||
var providerPart, modelPart string
|
||||
for _, sep := range []string{"://"} {
|
||||
if parts := strings.SplitN(modelName, sep, 2); len(parts) == 2 {
|
||||
providerPart = parts[0]
|
||||
modelPart = parts[1]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if providerPart == "" {
|
||||
return "", modelName, false
|
||||
}
|
||||
|
||||
// Check if the provider is a configured openai-compatibility provider
|
||||
for _, pName := range h.OpenAICompatProviders {
|
||||
if pName == providerPart {
|
||||
return providerPart, modelPart, true
|
||||
}
|
||||
}
|
||||
|
||||
return "", modelName, false
|
||||
}
|
||||
|
||||
func cloneBytes(src []byte) []byte {
|
||||
if len(src) == 0 {
|
||||
return nil
|
||||
|
||||
@@ -872,6 +872,11 @@ func (m *Manager) persist(ctx context.Context, auth *Auth) error {
|
||||
if m.store == nil || auth == nil {
|
||||
return nil
|
||||
}
|
||||
if auth.Attributes != nil {
|
||||
if v := strings.ToLower(strings.TrimSpace(auth.Attributes["runtime_only"])); v == "true" {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// Skip persistence when metadata is absent (e.g., runtime-only auths).
|
||||
if auth.Metadata == nil {
|
||||
return nil
|
||||
|
||||
@@ -157,15 +157,6 @@ func (a *Auth) AccountInfo() (string, string) {
|
||||
return "oauth", v
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(a.Provider)), "aistudio-") {
|
||||
if label := strings.TrimSpace(a.Label); label != "" {
|
||||
return "oauth", label
|
||||
}
|
||||
if id := strings.TrimSpace(a.ID); id != "" {
|
||||
return "oauth", id
|
||||
}
|
||||
return "oauth", "aistudio"
|
||||
}
|
||||
if a.Attributes != nil {
|
||||
if v := a.Attributes["api_key"]; v != "" {
|
||||
return "api_key", v
|
||||
|
||||
@@ -194,15 +194,15 @@ func (s *Service) ensureWebsocketGateway() {
|
||||
s.wsGateway = wsrelay.NewManager(opts)
|
||||
}
|
||||
|
||||
func (s *Service) wsOnConnected(provider string) {
|
||||
if s == nil || provider == "" {
|
||||
func (s *Service) wsOnConnected(channelID string) {
|
||||
if s == nil || channelID == "" {
|
||||
return
|
||||
}
|
||||
if !strings.HasPrefix(strings.ToLower(provider), "aistudio-") {
|
||||
if !strings.HasPrefix(strings.ToLower(channelID), "aistudio-") {
|
||||
return
|
||||
}
|
||||
if s.coreManager != nil {
|
||||
if existing, ok := s.coreManager.GetByID(provider); ok && existing != nil {
|
||||
if existing, ok := s.coreManager.GetByID(channelID); ok && existing != nil {
|
||||
if !existing.Disabled && existing.Status == coreauth.StatusActive {
|
||||
return
|
||||
}
|
||||
@@ -210,36 +210,34 @@ func (s *Service) wsOnConnected(provider string) {
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
auth := &coreauth.Auth{
|
||||
ID: provider,
|
||||
Provider: provider,
|
||||
Label: provider,
|
||||
ID: channelID, // keep channel identifier as ID
|
||||
Provider: "aistudio", // logical provider for switch routing
|
||||
Label: channelID, // display original channel id
|
||||
Status: coreauth.StatusActive,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
Attributes: map[string]string{"ws_provider": "gemini"},
|
||||
Attributes: map[string]string{"runtime_only": "true"},
|
||||
Metadata: map[string]any{"email": channelID}, // metadata drives logging and usage tracking
|
||||
}
|
||||
log.Infof("websocket provider connected: %s", provider)
|
||||
log.Infof("websocket provider connected: %s", channelID)
|
||||
s.applyCoreAuthAddOrUpdate(context.Background(), auth)
|
||||
}
|
||||
|
||||
func (s *Service) wsOnDisconnected(provider string, reason error) {
|
||||
if s == nil || provider == "" {
|
||||
func (s *Service) wsOnDisconnected(channelID string, reason error) {
|
||||
if s == nil || channelID == "" {
|
||||
return
|
||||
}
|
||||
if reason != nil {
|
||||
if strings.Contains(reason.Error(), "replaced by new connection") {
|
||||
log.Infof("websocket provider replaced: %s", provider)
|
||||
log.Infof("websocket provider replaced: %s", channelID)
|
||||
return
|
||||
}
|
||||
log.Warnf("websocket provider disconnected: %s (%v)", provider, reason)
|
||||
log.Warnf("websocket provider disconnected: %s (%v)", channelID, reason)
|
||||
} else {
|
||||
log.Infof("websocket provider disconnected: %s", provider)
|
||||
log.Infof("websocket provider disconnected: %s", channelID)
|
||||
}
|
||||
ctx := context.Background()
|
||||
s.applyCoreAuthRemoval(ctx, provider)
|
||||
if s.coreManager != nil {
|
||||
s.coreManager.UnregisterExecutor(provider)
|
||||
}
|
||||
s.applyCoreAuthRemoval(ctx, channelID)
|
||||
}
|
||||
|
||||
func (s *Service) applyCoreAuthAddOrUpdate(ctx context.Context, auth *coreauth.Auth) {
|
||||
@@ -317,17 +315,16 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) {
|
||||
s.coreManager.RegisterExecutor(executor.NewOpenAICompatExecutor(compatProviderKey, s.cfg))
|
||||
return
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(a.Provider)), "aistudio-") {
|
||||
if s.wsGateway != nil {
|
||||
s.coreManager.RegisterExecutor(executor.NewAistudioExecutor(s.cfg, a.Provider, s.wsGateway))
|
||||
}
|
||||
return
|
||||
}
|
||||
switch strings.ToLower(a.Provider) {
|
||||
case "gemini":
|
||||
s.coreManager.RegisterExecutor(executor.NewGeminiExecutor(s.cfg))
|
||||
case "gemini-cli":
|
||||
s.coreManager.RegisterExecutor(executor.NewGeminiCLIExecutor(s.cfg))
|
||||
case "aistudio":
|
||||
if s.wsGateway != nil {
|
||||
s.coreManager.RegisterExecutor(executor.NewAIStudioExecutor(s.cfg, a.ID, s.wsGateway))
|
||||
}
|
||||
return
|
||||
case "claude":
|
||||
s.coreManager.RegisterExecutor(executor.NewClaudeExecutor(s.cfg))
|
||||
case "codex":
|
||||
@@ -609,13 +606,6 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
|
||||
}
|
||||
provider := strings.ToLower(strings.TrimSpace(a.Provider))
|
||||
compatProviderKey, compatDisplayName, compatDetected := openAICompatInfoFromAuth(a)
|
||||
if a.Attributes != nil {
|
||||
if strings.EqualFold(a.Attributes["ws_provider"], "gemini") {
|
||||
models := mergeGeminiModels()
|
||||
GlobalModelRegistry().RegisterClient(a.ID, provider, models)
|
||||
return
|
||||
}
|
||||
}
|
||||
if compatDetected {
|
||||
provider = "openai-compatibility"
|
||||
}
|
||||
@@ -625,6 +615,8 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
|
||||
models = registry.GetGeminiModels()
|
||||
case "gemini-cli":
|
||||
models = registry.GetGeminiCLIModels()
|
||||
case "aistudio":
|
||||
models = registry.GetAIStudioModels()
|
||||
case "claude":
|
||||
models = registry.GetClaudeModels()
|
||||
if entry := s.resolveConfigClaudeKey(a); entry != nil && len(entry.Models) > 0 {
|
||||
@@ -726,27 +718,6 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
|
||||
}
|
||||
}
|
||||
|
||||
func mergeGeminiModels() []*ModelInfo {
|
||||
models := make([]*ModelInfo, 0, 16)
|
||||
seen := make(map[string]struct{})
|
||||
appendModels := func(items []*ModelInfo) {
|
||||
for i := range items {
|
||||
m := items[i]
|
||||
if m == nil || m.ID == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[m.ID]; ok {
|
||||
continue
|
||||
}
|
||||
seen[m.ID] = struct{}{}
|
||||
models = append(models, m)
|
||||
}
|
||||
}
|
||||
appendModels(registry.GetGeminiModels())
|
||||
appendModels(registry.GetGeminiCLIModels())
|
||||
return models
|
||||
}
|
||||
|
||||
func (s *Service) resolveConfigClaudeKey(auth *coreauth.Auth) *config.ClaudeKey {
|
||||
if auth == nil || s.cfg == nil {
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user