mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-02 20:40:52 +08:00
Merge pull request #582 from ben-vargas/fix-gemini-3-thinking-level
feat: use thinkingLevel for Gemini 3 models per Google documentation
This commit is contained in:
2
.github/pull.yml
vendored
Normal file
2
.github/pull.yml
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
version: "1"
|
||||||
|
rules: []
|
||||||
155
USING_WITH_FACTORY_AND_AMP.md
Normal file
155
USING_WITH_FACTORY_AND_AMP.md
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
# Using Factory CLI (Droid) and Amp CLI with CLIProxyAPI
|
||||||
|
|
||||||
|
## ⚠️ Important Update
|
||||||
|
|
||||||
|
**This fork has been merged upstream!** All Amp CLI integration features developed in this fork have been accepted and merged into the official [router-for-me/CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) repository.
|
||||||
|
|
||||||
|
**Please use the upstream repository for the latest features, updates, and support:**
|
||||||
|
|
||||||
|
👉 **[github.com/router-for-me/CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI)**
|
||||||
|
|
||||||
|
This document is maintained solely for legacy link preservation from previous social media posts and shared documentation.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Official Documentation
|
||||||
|
|
||||||
|
### Amp CLI Integration
|
||||||
|
|
||||||
|
For complete instructions on using Amp CLI with CLIProxyAPI, see the official documentation:
|
||||||
|
|
||||||
|
📖 **[Amp CLI Integration Guide](https://github.com/router-for-me/CLIProxyAPI/blob/main/docs/amp-cli-integration.md)**
|
||||||
|
|
||||||
|
This guide covers:
|
||||||
|
- OAuth setup for Gemini Pro/Ultra, ChatGPT Plus/Pro, and Claude Pro/Max subscriptions
|
||||||
|
- Configuration for Amp CLI and Amp IDE extensions
|
||||||
|
- Provider routing and management endpoints
|
||||||
|
- Troubleshooting and best practices
|
||||||
|
|
||||||
|
### Factory CLI (Droid) Integration
|
||||||
|
|
||||||
|
For instructions on using Factory AI's Droid CLI with CLIProxyAPI, see:
|
||||||
|
|
||||||
|
📖 **[Factory Droid Documentation](https://help.router-for.me/agent-client/droid.html)**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference: Factory CLI Custom Models
|
||||||
|
|
||||||
|
For quick reference, here's an example `~/.factory/config.json` configuration for using CLIProxyAPI with Factory CLI:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"custom_models": [
|
||||||
|
{
|
||||||
|
"model_display_name": "Claude Haiku 4.5 [Proxy]",
|
||||||
|
"model": "claude-haiku-4-5-20251001",
|
||||||
|
"base_url": "http://localhost:8317",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "anthropic"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "Claude Sonnet 4.5 [Proxy]",
|
||||||
|
"model": "claude-sonnet-4-5-20250929",
|
||||||
|
"base_url": "http://localhost:8317",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "anthropic"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "Claude Opus 4.1 [Proxy]",
|
||||||
|
"model": "claude-opus-4-1-20250805",
|
||||||
|
"base_url": "http://localhost:8317",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "anthropic"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 Low [Proxy]",
|
||||||
|
"model": "gpt-5.1-low",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 Medium [Proxy]",
|
||||||
|
"model": "gpt-5.1-medium",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 High [Proxy]",
|
||||||
|
"model": "gpt-5.1-high",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 Codex Low [Proxy]",
|
||||||
|
"model": "gpt-5.1-codex-low",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 Codex Medium [Proxy]",
|
||||||
|
"model": "gpt-5.1-codex-medium",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 Codex High [Proxy]",
|
||||||
|
"model": "gpt-5.1-codex-high",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 Codex Mini Medium [Proxy]",
|
||||||
|
"model": "gpt-5.1-codex-mini-medium",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "GPT-5.1 Codex Mini High [Proxy]",
|
||||||
|
"model": "gpt-5.1-codex-mini-high",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_display_name": "Gemini 3 Pro Preview [Proxy]",
|
||||||
|
"model": "gemini-3-pro-preview",
|
||||||
|
"base_url": "http://localhost:8317/v1",
|
||||||
|
"api_key": "dummy-not-used",
|
||||||
|
"provider": "openai"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Points
|
||||||
|
|
||||||
|
- **`base_url`**: Use `http://localhost:8317` for Anthropic models, `http://localhost:8317/v1` for OpenAI/generic models
|
||||||
|
- **`api_key`**: Use `"dummy-not-used"` when OAuth is configured via CLIProxyAPI
|
||||||
|
- **`provider`**: Set to `"anthropic"` for Claude models, `"openai"` for GPT/Gemini models
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Install the official CLIProxyAPI from the upstream repository:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/router-for-me/CLIProxyAPI.git
|
||||||
|
cd CLIProxyAPI
|
||||||
|
go build -o cli-proxy-api ./cmd/server
|
||||||
|
```
|
||||||
|
|
||||||
|
Or via Homebrew (macOS/Linux):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
brew install cliproxyapi
|
||||||
|
brew services start cliproxyapi
|
||||||
|
```
|
||||||
@@ -160,7 +160,7 @@ func GetGeminiModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-3-pro-image-preview",
|
ID: "gemini-3-pro-image-preview",
|
||||||
@@ -175,7 +175,7 @@ func GetGeminiModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -240,7 +240,7 @@ func GetGeminiVertexModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-3-flash-preview",
|
ID: "gemini-3-flash-preview",
|
||||||
@@ -255,7 +255,7 @@ func GetGeminiVertexModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-3-pro-image-preview",
|
ID: "gemini-3-pro-image-preview",
|
||||||
@@ -270,7 +270,7 @@ func GetGeminiVertexModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -336,7 +336,7 @@ func GetGeminiCLIModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-3-flash-preview",
|
ID: "gemini-3-flash-preview",
|
||||||
@@ -351,7 +351,7 @@ func GetGeminiCLIModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -417,7 +417,7 @@ func GetAIStudioModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-3-flash-preview",
|
ID: "gemini-3-flash-preview",
|
||||||
@@ -432,7 +432,7 @@ func GetAIStudioModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-pro-latest",
|
ID: "gemini-pro-latest",
|
||||||
@@ -743,8 +743,9 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
|||||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
|
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
|
||||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
|
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
|
||||||
"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
|
"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
|
||||||
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"},
|
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
|
||||||
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"},
|
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
|
||||||
|
"gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
|
||||||
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||||
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -323,8 +323,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
|
|||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||||
payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
|
payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
|
||||||
|
payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
|
||||||
payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
|
payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
|
||||||
payload = util.ConvertThinkingLevelToBudget(payload)
|
payload = util.ConvertThinkingLevelToBudget(payload, req.Model)
|
||||||
payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
|
payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
|
||||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||||
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
||||||
|
|||||||
@@ -90,6 +90,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
|
|
||||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||||
|
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||||
translated = normalizeAntigravityThinking(req.Model, translated)
|
translated = normalizeAntigravityThinking(req.Model, translated)
|
||||||
|
|
||||||
@@ -183,6 +184,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
|
|||||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||||
|
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||||
translated = normalizeAntigravityThinking(req.Model, translated)
|
translated = normalizeAntigravityThinking(req.Model, translated)
|
||||||
|
|
||||||
@@ -515,6 +517,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
|||||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||||
|
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||||
translated = normalizeAntigravityThinking(req.Model, translated)
|
translated = normalizeAntigravityThinking(req.Model, translated)
|
||||||
|
|
||||||
|
|||||||
@@ -79,6 +79,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
to := sdktranslator.FromString("gemini-cli")
|
to := sdktranslator.FromString("gemini-cli")
|
||||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||||
|
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
||||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||||
@@ -217,6 +218,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
to := sdktranslator.FromString("gemini-cli")
|
to := sdktranslator.FromString("gemini-cli")
|
||||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||||
|
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
||||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||||
@@ -418,6 +420,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
|||||||
for _, attemptModel := range models {
|
for _, attemptModel := range models {
|
||||||
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
||||||
payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||||
|
payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
|
||||||
payload = deleteJSONField(payload, "project")
|
payload = deleteJSONField(payload, "project")
|
||||||
payload = deleteJSONField(payload, "model")
|
payload = deleteJSONField(payload, "model")
|
||||||
payload = deleteJSONField(payload, "request.safetySettings")
|
payload = deleteJSONField(payload, "request.safetySettings")
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
@@ -13,6 +14,44 @@ const (
|
|||||||
GeminiOriginalModelMetadataKey = "gemini_original_model"
|
GeminiOriginalModelMetadataKey = "gemini_original_model"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Gemini model family detection patterns
|
||||||
|
var (
|
||||||
|
gemini3Pattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`)
|
||||||
|
gemini3ProPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]pro`)
|
||||||
|
gemini3FlashPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]flash`)
|
||||||
|
gemini25Pattern = regexp.MustCompile(`(?i)^gemini[_-]?2\.5[_-]`)
|
||||||
|
)
|
||||||
|
|
||||||
|
// IsGemini3Model returns true if the model is a Gemini 3 family model.
|
||||||
|
// Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number).
|
||||||
|
func IsGemini3Model(model string) bool {
|
||||||
|
return gemini3Pattern.MatchString(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsGemini3ProModel returns true if the model is a Gemini 3 Pro variant.
|
||||||
|
// Gemini 3 Pro supports thinkingLevel: "low", "high" (default: "high")
|
||||||
|
func IsGemini3ProModel(model string) bool {
|
||||||
|
return gemini3ProPattern.MatchString(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsGemini3FlashModel returns true if the model is a Gemini 3 Flash variant.
|
||||||
|
// Gemini 3 Flash supports thinkingLevel: "minimal", "low", "medium", "high" (default: "high")
|
||||||
|
func IsGemini3FlashModel(model string) bool {
|
||||||
|
return gemini3FlashPattern.MatchString(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsGemini25Model returns true if the model is a Gemini 2.5 family model.
|
||||||
|
// Gemini 2.5 models should use thinkingBudget (number).
|
||||||
|
func IsGemini25Model(model string) bool {
|
||||||
|
return gemini25Pattern.MatchString(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gemini3ProThinkingLevels are the valid thinkingLevel values for Gemini 3 Pro models.
|
||||||
|
var Gemini3ProThinkingLevels = []string{"low", "high"}
|
||||||
|
|
||||||
|
// Gemini3FlashThinkingLevels are the valid thinkingLevel values for Gemini 3 Flash models.
|
||||||
|
var Gemini3FlashThinkingLevels = []string{"minimal", "low", "medium", "high"}
|
||||||
|
|
||||||
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
||||||
if budget == nil && includeThoughts == nil {
|
if budget == nil && includeThoughts == nil {
|
||||||
return body
|
return body
|
||||||
@@ -69,10 +108,141 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
|
|||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ApplyGeminiThinkingLevel applies thinkingLevel config for Gemini 3 models.
|
||||||
|
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||||
|
// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
|
||||||
|
func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
|
||||||
|
if level == "" && includeThoughts == nil {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
updated := body
|
||||||
|
if level != "" {
|
||||||
|
valuePath := "generationConfig.thinkingConfig.thinkingLevel"
|
||||||
|
rewritten, err := sjson.SetBytes(updated, valuePath, level)
|
||||||
|
if err == nil {
|
||||||
|
updated = rewritten
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Default to including thoughts when a level is set but no explicit include flag is provided.
|
||||||
|
incl := includeThoughts
|
||||||
|
if incl == nil && level != "" {
|
||||||
|
defaultInclude := true
|
||||||
|
incl = &defaultInclude
|
||||||
|
}
|
||||||
|
if incl != nil {
|
||||||
|
valuePath := "generationConfig.thinkingConfig.includeThoughts"
|
||||||
|
rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
|
||||||
|
if err == nil {
|
||||||
|
updated = rewritten
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApplyGeminiCLIThinkingLevel applies thinkingLevel config for Gemini 3 models.
|
||||||
|
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||||
|
// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
|
||||||
|
func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
|
||||||
|
if level == "" && includeThoughts == nil {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
updated := body
|
||||||
|
if level != "" {
|
||||||
|
valuePath := "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||||
|
rewritten, err := sjson.SetBytes(updated, valuePath, level)
|
||||||
|
if err == nil {
|
||||||
|
updated = rewritten
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Default to including thoughts when a level is set but no explicit include flag is provided.
|
||||||
|
incl := includeThoughts
|
||||||
|
if incl == nil && level != "" {
|
||||||
|
defaultInclude := true
|
||||||
|
incl = &defaultInclude
|
||||||
|
}
|
||||||
|
if incl != nil {
|
||||||
|
valuePath := "request.generationConfig.thinkingConfig.includeThoughts"
|
||||||
|
rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
|
||||||
|
if err == nil {
|
||||||
|
updated = rewritten
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateGemini3ThinkingLevel validates that the thinkingLevel is valid for the Gemini 3 model variant.
|
||||||
|
// Returns the validated level (normalized to lowercase) and true if valid, or empty string and false if invalid.
|
||||||
|
func ValidateGemini3ThinkingLevel(model, level string) (string, bool) {
|
||||||
|
if level == "" {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
normalized := strings.ToLower(strings.TrimSpace(level))
|
||||||
|
|
||||||
|
var validLevels []string
|
||||||
|
if IsGemini3ProModel(model) {
|
||||||
|
validLevels = Gemini3ProThinkingLevels
|
||||||
|
} else if IsGemini3FlashModel(model) {
|
||||||
|
validLevels = Gemini3FlashThinkingLevels
|
||||||
|
} else if IsGemini3Model(model) {
|
||||||
|
// Unknown Gemini 3 variant - allow all levels as fallback
|
||||||
|
validLevels = Gemini3FlashThinkingLevels
|
||||||
|
} else {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, valid := range validLevels {
|
||||||
|
if normalized == valid {
|
||||||
|
return normalized, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThinkingBudgetToGemini3Level converts a thinkingBudget to a thinkingLevel for Gemini 3 models.
|
||||||
|
// This provides backward compatibility when thinkingBudget is provided for Gemini 3 models.
|
||||||
|
// Returns the appropriate thinkingLevel and true if conversion is possible.
|
||||||
|
func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) {
|
||||||
|
if !IsGemini3Model(model) {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map budget to level based on Google's documentation
|
||||||
|
// Gemini 3 Pro: "low", "high" (default: "high")
|
||||||
|
// Gemini 3 Flash: "minimal", "low", "medium", "high" (default: "high")
|
||||||
|
switch {
|
||||||
|
case budget == -1:
|
||||||
|
// Dynamic budget maps to "high" (API default)
|
||||||
|
return "high", true
|
||||||
|
case budget == 0:
|
||||||
|
// Zero budget - Gemini 3 doesn't support disabling thinking
|
||||||
|
// Map to lowest available level
|
||||||
|
if IsGemini3FlashModel(model) {
|
||||||
|
return "minimal", true
|
||||||
|
}
|
||||||
|
return "low", true
|
||||||
|
case budget > 0 && budget <= 512:
|
||||||
|
if IsGemini3FlashModel(model) {
|
||||||
|
return "minimal", true
|
||||||
|
}
|
||||||
|
return "low", true
|
||||||
|
case budget <= 1024:
|
||||||
|
return "low", true
|
||||||
|
case budget <= 8192:
|
||||||
|
if IsGemini3FlashModel(model) {
|
||||||
|
return "medium", true
|
||||||
|
}
|
||||||
|
return "low", true // Pro doesn't have medium, use low
|
||||||
|
default:
|
||||||
|
return "high", true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// modelsWithDefaultThinking lists models that should have thinking enabled by default
|
// modelsWithDefaultThinking lists models that should have thinking enabled by default
|
||||||
// when no explicit thinkingConfig is provided.
|
// when no explicit thinkingConfig is provided.
|
||||||
var modelsWithDefaultThinking = map[string]bool{
|
var modelsWithDefaultThinking = map[string]bool{
|
||||||
"gemini-3-pro-preview": true,
|
"gemini-3-pro-preview": true,
|
||||||
|
"gemini-3-pro-image-preview": true,
|
||||||
|
"gemini-3-flash-preview": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
|
// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
|
||||||
@@ -83,6 +253,7 @@ func ModelHasDefaultThinking(model string) bool {
|
|||||||
// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
|
// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
|
||||||
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||||
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
||||||
|
// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
|
||||||
func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
|
func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
|
||||||
if !ModelHasDefaultThinking(model) {
|
if !ModelHasDefaultThinking(model) {
|
||||||
return body
|
return body
|
||||||
@@ -90,14 +261,59 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
|
|||||||
if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
|
if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
// Gemini 3 models use thinkingLevel instead of thinkingBudget
|
||||||
|
if IsGemini3Model(model) {
|
||||||
|
// Don't set a default - let the API use its dynamic default ("high")
|
||||||
|
// Only set includeThoughts
|
||||||
|
updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
// Gemini 2.5 and other models use thinkingBudget
|
||||||
updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
|
updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
|
||||||
|
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||||
|
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
|
||||||
|
func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
|
||||||
|
if !IsGemini3Model(model) {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
effort, ok := ReasoningEffortFromMetadata(metadata)
|
||||||
|
if !ok || effort == "" {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
// Validate and apply the thinkingLevel
|
||||||
|
if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
|
||||||
|
return ApplyGeminiThinkingLevel(body, level, nil)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
|
||||||
|
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||||
|
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
|
||||||
|
func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
|
||||||
|
if !IsGemini3Model(model) {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
effort, ok := ReasoningEffortFromMetadata(metadata)
|
||||||
|
if !ok || effort == "" {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
// Validate and apply the thinkingLevel
|
||||||
|
if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
|
||||||
|
return ApplyGeminiCLIThinkingLevel(body, level, nil)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
|
// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
|
||||||
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||||
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
||||||
|
// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
|
||||||
func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
|
func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
|
||||||
if !ModelHasDefaultThinking(model) {
|
if !ModelHasDefaultThinking(model) {
|
||||||
return body
|
return body
|
||||||
@@ -105,6 +321,14 @@ func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
|
|||||||
if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
|
if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
// Gemini 3 models use thinkingLevel instead of thinkingBudget
|
||||||
|
if IsGemini3Model(model) {
|
||||||
|
// Don't set a default - let the API use its dynamic default ("high")
|
||||||
|
// Only set includeThoughts
|
||||||
|
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
// Gemini 2.5 and other models use thinkingBudget
|
||||||
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
return updated
|
return updated
|
||||||
@@ -128,12 +352,29 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
|
|||||||
|
|
||||||
// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
|
// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
|
||||||
// request body (generationConfig.thinkingConfig.thinkingBudget path).
|
// request body (generationConfig.thinkingConfig.thinkingBudget path).
|
||||||
|
// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation.
|
||||||
func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
|
func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
|
||||||
const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
|
const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
|
||||||
|
const levelPath = "generationConfig.thinkingConfig.thinkingLevel"
|
||||||
|
|
||||||
budget := gjson.GetBytes(body, budgetPath)
|
budget := gjson.GetBytes(body, budgetPath)
|
||||||
if !budget.Exists() {
|
if !budget.Exists() {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For Gemini 3 models, convert thinkingBudget to thinkingLevel
|
||||||
|
if IsGemini3Model(model) {
|
||||||
|
if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
|
||||||
|
updated, _ := sjson.SetBytes(body, levelPath, level)
|
||||||
|
updated, _ = sjson.DeleteBytes(updated, budgetPath)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
// If conversion fails, just remove the budget (let API use default)
|
||||||
|
updated, _ := sjson.DeleteBytes(body, budgetPath)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// For Gemini 2.5 and other models, normalize the budget value
|
||||||
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
||||||
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
||||||
return updated
|
return updated
|
||||||
@@ -141,12 +382,29 @@ func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
|
|||||||
|
|
||||||
// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
|
// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
|
||||||
// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
|
// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
|
||||||
|
// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation.
|
||||||
func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
|
func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
|
||||||
const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
|
const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
|
||||||
|
const levelPath = "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||||
|
|
||||||
budget := gjson.GetBytes(body, budgetPath)
|
budget := gjson.GetBytes(body, budgetPath)
|
||||||
if !budget.Exists() {
|
if !budget.Exists() {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For Gemini 3 models, convert thinkingBudget to thinkingLevel
|
||||||
|
if IsGemini3Model(model) {
|
||||||
|
if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
|
||||||
|
updated, _ := sjson.SetBytes(body, levelPath, level)
|
||||||
|
updated, _ = sjson.DeleteBytes(updated, budgetPath)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
// If conversion fails, just remove the budget (let API use default)
|
||||||
|
updated, _ := sjson.DeleteBytes(body, budgetPath)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// For Gemini 2.5 and other models, normalize the budget value
|
||||||
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
||||||
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
||||||
return updated
|
return updated
|
||||||
@@ -218,34 +476,42 @@ func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
|
// ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
|
||||||
// and converts it to "thinkingBudget".
|
// and converts it to "thinkingBudget" for Gemini 2.5 models.
|
||||||
// "high" -> 32768
|
// For Gemini 3 models, preserves thinkingLevel as-is (does not convert).
|
||||||
// "low" -> 128
|
// Mappings for Gemini 2.5:
|
||||||
// It removes "thinkingLevel" after conversion.
|
// - "high" -> 32768
|
||||||
func ConvertThinkingLevelToBudget(body []byte) []byte {
|
// - "medium" -> 8192
|
||||||
|
// - "low" -> 1024
|
||||||
|
// - "minimal" -> 512
|
||||||
|
//
|
||||||
|
// It removes "thinkingLevel" after conversion (for Gemini 2.5 only).
|
||||||
|
func ConvertThinkingLevelToBudget(body []byte, model string) []byte {
|
||||||
levelPath := "generationConfig.thinkingConfig.thinkingLevel"
|
levelPath := "generationConfig.thinkingConfig.thinkingLevel"
|
||||||
res := gjson.GetBytes(body, levelPath)
|
res := gjson.GetBytes(body, levelPath)
|
||||||
if !res.Exists() {
|
if !res.Exists() {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For Gemini 3 models, preserve thinkingLevel - don't convert to budget
|
||||||
|
if IsGemini3Model(model) {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
level := strings.ToLower(res.String())
|
level := strings.ToLower(res.String())
|
||||||
var budget int
|
var budget int
|
||||||
switch level {
|
switch level {
|
||||||
case "high":
|
case "high":
|
||||||
budget = 32768
|
budget = 32768
|
||||||
|
case "medium":
|
||||||
|
budget = 8192
|
||||||
case "low":
|
case "low":
|
||||||
budget = 128
|
budget = 1024
|
||||||
|
case "minimal":
|
||||||
|
budget = 512
|
||||||
default:
|
default:
|
||||||
// If unknown level, we might just leave it or default.
|
// Unknown level - remove it and let the API use defaults
|
||||||
// User only specified high and low. We'll assume we shouldn't touch it if it's something else,
|
updated, _ := sjson.DeleteBytes(body, levelPath)
|
||||||
// or maybe we should just remove the invalid level?
|
return updated
|
||||||
// For safety adhering to strict instructions: "If high... if low...".
|
|
||||||
// If it's something else, the upstream might fail anyway if we leave it,
|
|
||||||
// but let's just delete the level if we processed it.
|
|
||||||
// Actually, let's check if we need to do anything for other values.
|
|
||||||
// For now, only handle high/low.
|
|
||||||
return body
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set budget
|
// Set budget
|
||||||
@@ -262,3 +528,50 @@ func ConvertThinkingLevelToBudget(body []byte) []byte {
|
|||||||
}
|
}
|
||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ConvertThinkingLevelToBudgetCLI checks for "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||||
|
// and converts it to "thinkingBudget" for Gemini 2.5 models.
|
||||||
|
// For Gemini 3 models, preserves thinkingLevel as-is (does not convert).
|
||||||
|
func ConvertThinkingLevelToBudgetCLI(body []byte, model string) []byte {
|
||||||
|
levelPath := "request.generationConfig.thinkingConfig.thinkingLevel"
|
||||||
|
res := gjson.GetBytes(body, levelPath)
|
||||||
|
if !res.Exists() {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// For Gemini 3 models, preserve thinkingLevel - don't convert to budget
|
||||||
|
if IsGemini3Model(model) {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
level := strings.ToLower(res.String())
|
||||||
|
var budget int
|
||||||
|
switch level {
|
||||||
|
case "high":
|
||||||
|
budget = 32768
|
||||||
|
case "medium":
|
||||||
|
budget = 8192
|
||||||
|
case "low":
|
||||||
|
budget = 1024
|
||||||
|
case "minimal":
|
||||||
|
budget = 512
|
||||||
|
default:
|
||||||
|
// Unknown level - remove it and let the API use defaults
|
||||||
|
updated, _ := sjson.DeleteBytes(body, levelPath)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set budget
|
||||||
|
budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
|
||||||
|
updated, err := sjson.SetBytes(body, budgetPath, budget)
|
||||||
|
if err != nil {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove level
|
||||||
|
updated, err = sjson.DeleteBytes(updated, levelPath)
|
||||||
|
if err != nil {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|||||||
423
test/gemini3_thinking_level_test.go
Normal file
423
test/gemini3_thinking_level_test.go
Normal file
@@ -0,0 +1,423 @@
|
|||||||
|
package test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
|
)
|
||||||
|
|
||||||
|
// registerGemini3Models loads Gemini 3 models into the registry for testing.
|
||||||
|
func registerGemini3Models(t *testing.T) func() {
|
||||||
|
t.Helper()
|
||||||
|
reg := registry.GetGlobalRegistry()
|
||||||
|
uid := fmt.Sprintf("gemini3-test-%d", time.Now().UnixNano())
|
||||||
|
reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
|
||||||
|
reg.RegisterClient(uid+"-aistudio", "aistudio", registry.GetAIStudioModels())
|
||||||
|
return func() {
|
||||||
|
reg.UnregisterClient(uid + "-gemini")
|
||||||
|
reg.UnregisterClient(uid + "-aistudio")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsGemini3Model(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
model string
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{"gemini-3-pro-preview", true},
|
||||||
|
{"gemini-3-flash-preview", true},
|
||||||
|
{"gemini_3_pro_preview", true},
|
||||||
|
{"gemini-3-pro", true},
|
||||||
|
{"gemini-3-flash", true},
|
||||||
|
{"GEMINI-3-PRO-PREVIEW", true},
|
||||||
|
{"gemini-2.5-pro", false},
|
||||||
|
{"gemini-2.5-flash", false},
|
||||||
|
{"gpt-5", false},
|
||||||
|
{"claude-sonnet-4-5", false},
|
||||||
|
{"", false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.model, func(t *testing.T) {
|
||||||
|
got := util.IsGemini3Model(cs.model)
|
||||||
|
if got != cs.expected {
|
||||||
|
t.Fatalf("IsGemini3Model(%q) = %v, want %v", cs.model, got, cs.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsGemini3ProModel(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
model string
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{"gemini-3-pro-preview", true},
|
||||||
|
{"gemini_3_pro_preview", true},
|
||||||
|
{"gemini-3-pro", true},
|
||||||
|
{"GEMINI-3-PRO-PREVIEW", true},
|
||||||
|
{"gemini-3-flash-preview", false},
|
||||||
|
{"gemini-3-flash", false},
|
||||||
|
{"gemini-2.5-pro", false},
|
||||||
|
{"", false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.model, func(t *testing.T) {
|
||||||
|
got := util.IsGemini3ProModel(cs.model)
|
||||||
|
if got != cs.expected {
|
||||||
|
t.Fatalf("IsGemini3ProModel(%q) = %v, want %v", cs.model, got, cs.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsGemini3FlashModel(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
model string
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{"gemini-3-flash-preview", true},
|
||||||
|
{"gemini_3_flash_preview", true},
|
||||||
|
{"gemini-3-flash", true},
|
||||||
|
{"GEMINI-3-FLASH-PREVIEW", true},
|
||||||
|
{"gemini-3-pro-preview", false},
|
||||||
|
{"gemini-3-pro", false},
|
||||||
|
{"gemini-2.5-flash", false},
|
||||||
|
{"", false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.model, func(t *testing.T) {
|
||||||
|
got := util.IsGemini3FlashModel(cs.model)
|
||||||
|
if got != cs.expected {
|
||||||
|
t.Fatalf("IsGemini3FlashModel(%q) = %v, want %v", cs.model, got, cs.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateGemini3ThinkingLevel(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
model string
|
||||||
|
level string
|
||||||
|
wantOK bool
|
||||||
|
wantVal string
|
||||||
|
}{
|
||||||
|
// Gemini 3 Pro: supports "low", "high"
|
||||||
|
{"pro-low", "gemini-3-pro-preview", "low", true, "low"},
|
||||||
|
{"pro-high", "gemini-3-pro-preview", "high", true, "high"},
|
||||||
|
{"pro-minimal-invalid", "gemini-3-pro-preview", "minimal", false, ""},
|
||||||
|
{"pro-medium-invalid", "gemini-3-pro-preview", "medium", false, ""},
|
||||||
|
|
||||||
|
// Gemini 3 Flash: supports "minimal", "low", "medium", "high"
|
||||||
|
{"flash-minimal", "gemini-3-flash-preview", "minimal", true, "minimal"},
|
||||||
|
{"flash-low", "gemini-3-flash-preview", "low", true, "low"},
|
||||||
|
{"flash-medium", "gemini-3-flash-preview", "medium", true, "medium"},
|
||||||
|
{"flash-high", "gemini-3-flash-preview", "high", true, "high"},
|
||||||
|
|
||||||
|
// Case insensitivity
|
||||||
|
{"flash-LOW-case", "gemini-3-flash-preview", "LOW", true, "low"},
|
||||||
|
{"flash-High-case", "gemini-3-flash-preview", "High", true, "high"},
|
||||||
|
{"pro-HIGH-case", "gemini-3-pro-preview", "HIGH", true, "high"},
|
||||||
|
|
||||||
|
// Invalid levels
|
||||||
|
{"flash-invalid", "gemini-3-flash-preview", "xhigh", false, ""},
|
||||||
|
{"flash-invalid-auto", "gemini-3-flash-preview", "auto", false, ""},
|
||||||
|
{"flash-empty", "gemini-3-flash-preview", "", false, ""},
|
||||||
|
|
||||||
|
// Non-Gemini 3 models
|
||||||
|
{"non-gemini3", "gemini-2.5-pro", "high", false, ""},
|
||||||
|
{"gpt5", "gpt-5", "high", false, ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.name, func(t *testing.T) {
|
||||||
|
got, ok := util.ValidateGemini3ThinkingLevel(cs.model, cs.level)
|
||||||
|
if ok != cs.wantOK {
|
||||||
|
t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) ok = %v, want %v", cs.model, cs.level, ok, cs.wantOK)
|
||||||
|
}
|
||||||
|
if got != cs.wantVal {
|
||||||
|
t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) = %q, want %q", cs.model, cs.level, got, cs.wantVal)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestThinkingBudgetToGemini3Level(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
model string
|
||||||
|
budget int
|
||||||
|
wantOK bool
|
||||||
|
wantVal string
|
||||||
|
}{
|
||||||
|
// Gemini 3 Pro: maps to "low" or "high"
|
||||||
|
{"pro-dynamic", "gemini-3-pro-preview", -1, true, "high"},
|
||||||
|
{"pro-zero", "gemini-3-pro-preview", 0, true, "low"},
|
||||||
|
{"pro-small", "gemini-3-pro-preview", 1000, true, "low"},
|
||||||
|
{"pro-medium", "gemini-3-pro-preview", 8000, true, "low"},
|
||||||
|
{"pro-large", "gemini-3-pro-preview", 20000, true, "high"},
|
||||||
|
{"pro-huge", "gemini-3-pro-preview", 50000, true, "high"},
|
||||||
|
|
||||||
|
// Gemini 3 Flash: maps to "minimal", "low", "medium", "high"
|
||||||
|
{"flash-dynamic", "gemini-3-flash-preview", -1, true, "high"},
|
||||||
|
{"flash-zero", "gemini-3-flash-preview", 0, true, "minimal"},
|
||||||
|
{"flash-tiny", "gemini-3-flash-preview", 500, true, "minimal"},
|
||||||
|
{"flash-small", "gemini-3-flash-preview", 1000, true, "low"},
|
||||||
|
{"flash-medium-val", "gemini-3-flash-preview", 8000, true, "medium"},
|
||||||
|
{"flash-large", "gemini-3-flash-preview", 20000, true, "high"},
|
||||||
|
{"flash-huge", "gemini-3-flash-preview", 50000, true, "high"},
|
||||||
|
|
||||||
|
// Non-Gemini 3 models should return false
|
||||||
|
{"gemini25-budget", "gemini-2.5-pro", 8000, false, ""},
|
||||||
|
{"gpt5-budget", "gpt-5", 8000, false, ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.name, func(t *testing.T) {
|
||||||
|
got, ok := util.ThinkingBudgetToGemini3Level(cs.model, cs.budget)
|
||||||
|
if ok != cs.wantOK {
|
||||||
|
t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) ok = %v, want %v", cs.model, cs.budget, ok, cs.wantOK)
|
||||||
|
}
|
||||||
|
if got != cs.wantVal {
|
||||||
|
t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) = %q, want %q", cs.model, cs.budget, got, cs.wantVal)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGemini3ThinkingLevelFromMetadata(t *testing.T) {
|
||||||
|
cleanup := registerGemini3Models(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
model string
|
||||||
|
metadata map[string]any
|
||||||
|
inputBody string
|
||||||
|
wantLevel string
|
||||||
|
wantInclude bool
|
||||||
|
wantNoChange bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "flash-minimal-from-suffix",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "minimal"},
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
|
||||||
|
wantLevel: "minimal",
|
||||||
|
wantInclude: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "flash-medium-from-suffix",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "medium"},
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
|
||||||
|
wantLevel: "medium",
|
||||||
|
wantInclude: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "pro-high-from-suffix",
|
||||||
|
model: "gemini-3-pro-preview",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "high"},
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
|
||||||
|
wantLevel: "high",
|
||||||
|
wantInclude: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no-metadata-no-change",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
metadata: nil,
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
|
||||||
|
wantNoChange: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-gemini3-no-change",
|
||||||
|
model: "gemini-2.5-pro",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "high"},
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
|
||||||
|
wantNoChange: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid-level-no-change",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "xhigh"},
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
|
||||||
|
wantNoChange: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.name, func(t *testing.T) {
|
||||||
|
input := []byte(cs.inputBody)
|
||||||
|
result := util.ApplyGemini3ThinkingLevelFromMetadata(cs.model, cs.metadata, input)
|
||||||
|
|
||||||
|
if cs.wantNoChange {
|
||||||
|
if string(result) != cs.inputBody {
|
||||||
|
t.Fatalf("expected no change, but got: %s", string(result))
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
|
||||||
|
if !level.Exists() {
|
||||||
|
t.Fatalf("thinkingLevel not set in result: %s", string(result))
|
||||||
|
}
|
||||||
|
if level.String() != cs.wantLevel {
|
||||||
|
t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
|
||||||
|
}
|
||||||
|
|
||||||
|
include := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts")
|
||||||
|
if cs.wantInclude && (!include.Exists() || !include.Bool()) {
|
||||||
|
t.Fatalf("includeThoughts should be true, got: %s", string(result))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGemini3ThinkingLevelFromMetadataCLI(t *testing.T) {
|
||||||
|
cleanup := registerGemini3Models(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
model string
|
||||||
|
metadata map[string]any
|
||||||
|
inputBody string
|
||||||
|
wantLevel string
|
||||||
|
wantInclude bool
|
||||||
|
wantNoChange bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "flash-minimal-from-suffix-cli",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "minimal"},
|
||||||
|
inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
|
||||||
|
wantLevel: "minimal",
|
||||||
|
wantInclude: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "flash-low-from-suffix-cli",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "low"},
|
||||||
|
inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
|
||||||
|
wantLevel: "low",
|
||||||
|
wantInclude: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "pro-low-from-suffix-cli",
|
||||||
|
model: "gemini-3-pro-preview",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "low"},
|
||||||
|
inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
|
||||||
|
wantLevel: "low",
|
||||||
|
wantInclude: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no-metadata-no-change-cli",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
metadata: nil,
|
||||||
|
inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
|
||||||
|
wantNoChange: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-gemini3-no-change-cli",
|
||||||
|
model: "gemini-2.5-pro",
|
||||||
|
metadata: map[string]any{"reasoning_effort": "high"},
|
||||||
|
inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}}`,
|
||||||
|
wantNoChange: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.name, func(t *testing.T) {
|
||||||
|
input := []byte(cs.inputBody)
|
||||||
|
result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(cs.model, cs.metadata, input)
|
||||||
|
|
||||||
|
if cs.wantNoChange {
|
||||||
|
if string(result) != cs.inputBody {
|
||||||
|
t.Fatalf("expected no change, but got: %s", string(result))
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
level := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
|
||||||
|
if !level.Exists() {
|
||||||
|
t.Fatalf("thinkingLevel not set in result: %s", string(result))
|
||||||
|
}
|
||||||
|
if level.String() != cs.wantLevel {
|
||||||
|
t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
|
||||||
|
}
|
||||||
|
|
||||||
|
include := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts")
|
||||||
|
if cs.wantInclude && (!include.Exists() || !include.Bool()) {
|
||||||
|
t.Fatalf("includeThoughts should be true, got: %s", string(result))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeGeminiThinkingBudget_Gemini3Conversion(t *testing.T) {
|
||||||
|
cleanup := registerGemini3Models(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
model string
|
||||||
|
inputBody string
|
||||||
|
wantLevel string
|
||||||
|
wantBudget bool // if true, expect thinkingBudget instead of thinkingLevel
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "gemini3-flash-budget-to-level",
|
||||||
|
model: "gemini-3-flash-preview",
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`,
|
||||||
|
wantLevel: "medium",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gemini3-pro-budget-to-level",
|
||||||
|
model: "gemini-3-pro-preview",
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":20000}}}`,
|
||||||
|
wantLevel: "high",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gemini25-keeps-budget",
|
||||||
|
model: "gemini-2.5-pro",
|
||||||
|
inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`,
|
||||||
|
wantBudget: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range cases {
|
||||||
|
t.Run(cs.name, func(t *testing.T) {
|
||||||
|
result := util.NormalizeGeminiThinkingBudget(cs.model, []byte(cs.inputBody))
|
||||||
|
|
||||||
|
if cs.wantBudget {
|
||||||
|
budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget")
|
||||||
|
if !budget.Exists() {
|
||||||
|
t.Fatalf("thinkingBudget should exist for non-Gemini3 model: %s", string(result))
|
||||||
|
}
|
||||||
|
level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
|
||||||
|
if level.Exists() {
|
||||||
|
t.Fatalf("thinkingLevel should not exist for non-Gemini3 model: %s", string(result))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
|
||||||
|
if !level.Exists() {
|
||||||
|
t.Fatalf("thinkingLevel should exist for Gemini3 model: %s", string(result))
|
||||||
|
}
|
||||||
|
if level.String() != cs.wantLevel {
|
||||||
|
t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
|
||||||
|
}
|
||||||
|
budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget")
|
||||||
|
if budget.Exists() {
|
||||||
|
t.Fatalf("thinkingBudget should be removed for Gemini3 model: %s", string(result))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user