Compare commits

..

17 Commits

Author SHA1 Message Date
Luis Pater
9e5b1d24e8 Merge pull request #1276 from router-for-me/thinking
feat(thinking): enable thinking toggle for qwen3 and deepseek models
2026-01-28 11:16:54 +08:00
Luis Pater
a7dae6ad52 Merge remote-tracking branch 'origin/dev' into dev 2026-01-28 10:59:00 +08:00
Luis Pater
e93e05ae25 refactor: consolidate channel send logic with context-safe handlers
Optimize channel operations by introducing reusable context-aware send functions (`send` and `sendErr`) across `wsrelay`, `handlers`, and `cliproxy`. Ensure graceful handling of canceled contexts during stream operations.
2026-01-28 10:58:35 +08:00
hkfires
c8c27325dc feat(thinking): enable thinking toggle for qwen3 and deepseek models
Fix #1245
2026-01-28 09:54:05 +08:00
hkfires
c3b6f3918c chore(git): stop ignoring .idea and data directories 2026-01-28 09:52:44 +08:00
Luis Pater
bbb55a8ab4 Merge pull request #1170 from BianBianY/main
feat: optimization enable/disable auth files
2026-01-28 09:34:35 +08:00
Luis Pater
7583193c2a Merge pull request #1257 from router-for-me/model
feat(api): add management model definitions endpoint
2026-01-27 20:32:04 +08:00
hkfires
7cc3bd4ba0 chore(deps): mark golang.org/x/text as indirect 2026-01-27 19:19:52 +08:00
hkfires
88a0f095e8 chore(registry): disable gemini 2.5 flash image preview model 2026-01-27 18:33:13 +08:00
hkfires
c65f64dce0 chore(registry): comment out rev19-uic3-1p model config 2026-01-27 18:33:13 +08:00
hkfires
d18cd217e1 feat(api): add management model definitions endpoint 2026-01-27 18:33:12 +08:00
Luis Pater
ba4a1ab433 Merge pull request #1261 from Darley-Wey/fix/gemini_scheme
fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error
2026-01-27 17:02:25 +08:00
Darley
decddb521e fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error (Relates to #1260) 2026-01-27 11:14:08 +03:30
Luis Pater
70897247b2 feat(auth): add support for request_retry and disable_cooling overrides
Implement `request_retry` and `disable_cooling` metadata overrides for authentication management. Update retry and cooling logic accordingly across `Manager`, Antigravity executor, and file synthesizer. Add tests to validate new behaviors.
2026-01-26 21:59:08 +08:00
Luis Pater
9c341f5aa5 feat(auth): add skip persistence context key for file watcher events
Introduce `WithSkipPersist` to disable persistence during Manager Update/Register calls, preventing write-back loops caused by redundant file writes. Add corresponding tests and integrate with existing file store and conductor logic.
2026-01-26 18:20:19 +08:00
Yang Bian
f7bfa8a05c Merge branch 'upstream-main' 2026-01-24 16:28:08 +08:00
Yang Bian
c8620d1633 feat: optimization enable/disable auth files 2026-01-23 18:03:09 +08:00
20 changed files with 1451 additions and 964 deletions

2
go.mod
View File

@@ -21,7 +21,6 @@ require (
golang.org/x/crypto v0.45.0
golang.org/x/net v0.47.0
golang.org/x/oauth2 v0.30.0
golang.org/x/text v0.31.0
gopkg.in/natefinch/lumberjack.v2 v2.2.1
gopkg.in/yaml.v3 v3.0.1
)
@@ -71,6 +70,7 @@ require (
golang.org/x/arch v0.8.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.31.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
)

View File

@@ -0,0 +1,33 @@
package management
import (
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
)
// GetStaticModelDefinitions returns static model metadata for a given channel.
// Channel is provided via path param (:channel) or query param (?channel=...).
func (h *Handler) GetStaticModelDefinitions(c *gin.Context) {
channel := strings.TrimSpace(c.Param("channel"))
if channel == "" {
channel = strings.TrimSpace(c.Query("channel"))
}
if channel == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "channel is required"})
return
}
models := registry.GetStaticModelDefinitionsByChannel(channel)
if models == nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "unknown channel", "channel": channel})
return
}
c.JSON(http.StatusOK, gin.H{
"channel": strings.ToLower(strings.TrimSpace(channel)),
"models": models,
})
}

View File

@@ -607,6 +607,7 @@ func (s *Server) registerManagementRoutes() {
mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
mgmt.GET("/model-definitions/:channel", s.mgmt.GetStaticModelDefinitions)
mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)

View File

@@ -1,848 +1,69 @@
// Package registry provides model definitions for various AI service providers.
// This file contains static model definitions that can be used by clients
// when registering their supported models.
// Package registry provides model definitions and lookup helpers for various AI providers.
// Static model metadata is stored in model_definitions_static_data.go.
package registry
// GetClaudeModels returns the standard Claude model definitions
func GetClaudeModels() []*ModelInfo {
return []*ModelInfo{
import (
"sort"
"strings"
)
{
ID: "claude-haiku-4-5-20251001",
Object: "model",
Created: 1759276800, // 2025-10-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Haiku",
ContextLength: 200000,
MaxCompletionTokens: 64000,
// Thinking: not supported for Haiku models
},
{
ID: "claude-sonnet-4-5-20250929",
Object: "model",
Created: 1759104000, // 2025-09-29
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-5-20251101",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus",
Description: "Premium model combining maximum intelligence with practical performance",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-1-20250805",
Object: "model",
Created: 1722945600, // 2025-08-05
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.1 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-opus-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-sonnet-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-7-sonnet-20250219",
Object: "model",
Created: 1708300800, // 2025-02-19
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.7 Sonnet",
ContextLength: 128000,
MaxCompletionTokens: 8192,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-5-haiku-20241022",
Object: "model",
Created: 1729555200, // 2024-10-22
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.5 Haiku",
ContextLength: 128000,
MaxCompletionTokens: 8192,
// Thinking: not supported for Haiku models
},
}
}
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Gemini 3 Flash Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
}
}
func GetGeminiVertexModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
// Imagen image generation models - use :predict action
{
ID: "imagen-4.0-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Generate",
Description: "Imagen 4.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-ultra-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-ultra-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Ultra Generate",
Description: "Imagen 4.0 Ultra high-quality image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-generate-002",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-generate-002",
Version: "3.0",
DisplayName: "Imagen 3.0 Generate",
Description: "Imagen 3.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-fast-generate-001",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-fast-generate-001",
Version: "3.0",
DisplayName: "Imagen 3.0 Fast Generate",
Description: "Imagen 3.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-fast-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-fast-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Fast Generate",
Description: "Imagen 4.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
}
}
// GetGeminiCLIModels returns the standard Gemini model definitions
func GetGeminiCLIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
}
}
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-pro-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-pro-latest",
Version: "2.5",
DisplayName: "Gemini Pro Latest",
Description: "Latest release of Gemini Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-flash-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-latest",
Version: "2.5",
DisplayName: "Gemini Flash Latest",
Description: "Latest release of Gemini Flash",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-flash-lite-latest",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-lite-latest",
Version: "2.5",
DisplayName: "Gemini Flash-Lite Latest",
Description: "Latest release of Gemini Flash-Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-image-preview",
Object: "model",
Created: 1756166400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image-preview",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image Preview",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: 1759363200,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}
}
// GetOpenAIModels returns the standard OpenAI model definitions
func GetOpenAIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gpt-5",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gpt-5-codex",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5-codex-mini",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-mini",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Mini",
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-max",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max",
Description: "Stable version of GPT 5.1 Codex Max",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2",
Description: "Stable version of GPT 5.2",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2-codex",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2 Codex",
Description: "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
}
}
// GetQwenModels returns the standard Qwen model definitions
func GetQwenModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "qwen3-coder-plus",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Plus",
Description: "Advanced code generation and understanding model",
ContextLength: 32768,
MaxCompletionTokens: 8192,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "qwen3-coder-flash",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Flash",
Description: "Fast code generation model",
ContextLength: 8192,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "vision-model",
Object: "model",
Created: 1758672000,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Vision Model",
Description: "Vision model model",
ContextLength: 32768,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
}
}
// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
// Uses level-based configuration so standard normalization flows apply before conversion.
var iFlowThinkingSupport = &ThinkingSupport{
Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
}
// GetIFlowModels returns supported models for iFlow OAuth accounts.
func GetIFlowModels() []*ModelInfo {
entries := []struct {
ID string
DisplayName string
Description string
Created int64
Thinking *ThinkingSupport
}{
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
}
models := make([]*ModelInfo, 0, len(entries))
for _, entry := range entries {
models = append(models, &ModelInfo{
ID: entry.ID,
Object: "model",
Created: entry.Created,
OwnedBy: "iflow",
Type: "iflow",
DisplayName: entry.DisplayName,
Description: entry.Description,
Thinking: entry.Thinking,
// GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
// It returns nil when the channel is unknown.
//
// Supported channels:
// - claude
// - gemini
// - vertex
// - gemini-cli
// - aistudio
// - codex
// - qwen
// - iflow
// - antigravity (returns static overrides only)
func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
key := strings.ToLower(strings.TrimSpace(channel))
switch key {
case "claude":
return GetClaudeModels()
case "gemini":
return GetGeminiModels()
case "vertex":
return GetGeminiVertexModels()
case "gemini-cli":
return GetGeminiCLIModels()
case "aistudio":
return GetAIStudioModels()
case "codex":
return GetOpenAIModels()
case "qwen":
return GetQwenModels()
case "iflow":
return GetIFlowModels()
case "antigravity":
cfg := GetAntigravityModelConfig()
if len(cfg) == 0 {
return nil
}
models := make([]*ModelInfo, 0, len(cfg))
for modelID, entry := range cfg {
if modelID == "" || entry == nil {
continue
}
models = append(models, &ModelInfo{
ID: modelID,
Object: "model",
OwnedBy: "antigravity",
Type: "antigravity",
Thinking: entry.Thinking,
MaxCompletionTokens: entry.MaxCompletionTokens,
})
}
sort.Slice(models, func(i, j int) bool {
return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
})
}
return models
}
// AntigravityModelConfig captures static antigravity model overrides, including
// Thinking budget limits and provider max completion tokens.
type AntigravityModelConfig struct {
Thinking *ThinkingSupport
MaxCompletionTokens int
}
// GetAntigravityModelConfig returns static configuration for antigravity models.
// Keys use upstream model names returned by the Antigravity models endpoint.
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
return map[string]*AntigravityModelConfig{
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
"gpt-oss-120b-medium": {},
"tab_flash_lite_preview": {},
return models
default:
return nil
}
}

View File

@@ -0,0 +1,846 @@
// Package registry provides model definitions for various AI service providers.
// This file stores the static model metadata catalog.
package registry
// GetClaudeModels returns the standard Claude model definitions
func GetClaudeModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "claude-haiku-4-5-20251001",
Object: "model",
Created: 1759276800, // 2025-10-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Haiku",
ContextLength: 200000,
MaxCompletionTokens: 64000,
// Thinking: not supported for Haiku models
},
{
ID: "claude-sonnet-4-5-20250929",
Object: "model",
Created: 1759104000, // 2025-09-29
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-5-20251101",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus",
Description: "Premium model combining maximum intelligence with practical performance",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-1-20250805",
Object: "model",
Created: 1722945600, // 2025-08-05
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.1 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-opus-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-sonnet-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-7-sonnet-20250219",
Object: "model",
Created: 1708300800, // 2025-02-19
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.7 Sonnet",
ContextLength: 128000,
MaxCompletionTokens: 8192,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-5-haiku-20241022",
Object: "model",
Created: 1729555200, // 2024-10-22
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.5 Haiku",
ContextLength: 128000,
MaxCompletionTokens: 8192,
// Thinking: not supported for Haiku models
},
}
}
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Gemini 3 Flash Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
}
}
func GetGeminiVertexModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
// Imagen image generation models - use :predict action
{
ID: "imagen-4.0-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Generate",
Description: "Imagen 4.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-ultra-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-ultra-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Ultra Generate",
Description: "Imagen 4.0 Ultra high-quality image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-generate-002",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-generate-002",
Version: "3.0",
DisplayName: "Imagen 3.0 Generate",
Description: "Imagen 3.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-fast-generate-001",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-fast-generate-001",
Version: "3.0",
DisplayName: "Imagen 3.0 Fast Generate",
Description: "Imagen 3.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-fast-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-fast-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Fast Generate",
Description: "Imagen 4.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
}
}
// GetGeminiCLIModels returns the standard Gemini model definitions
func GetGeminiCLIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
}
}
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-pro-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-pro-latest",
Version: "2.5",
DisplayName: "Gemini Pro Latest",
Description: "Latest release of Gemini Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-flash-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-latest",
Version: "2.5",
DisplayName: "Gemini Flash Latest",
Description: "Latest release of Gemini Flash",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-flash-lite-latest",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-lite-latest",
Version: "2.5",
DisplayName: "Gemini Flash-Lite Latest",
Description: "Latest release of Gemini Flash-Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
// {
// ID: "gemini-2.5-flash-image-preview",
// Object: "model",
// Created: 1756166400,
// OwnedBy: "google",
// Type: "gemini",
// Name: "models/gemini-2.5-flash-image-preview",
// Version: "2.5",
// DisplayName: "Gemini 2.5 Flash Image Preview",
// Description: "State-of-the-art image generation and editing model.",
// InputTokenLimit: 1048576,
// OutputTokenLimit: 8192,
// SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// // image models don't support thinkingConfig; leave Thinking nil
// },
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: 1759363200,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}
}
// GetOpenAIModels returns the standard OpenAI model definitions
func GetOpenAIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gpt-5",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gpt-5-codex",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5-codex-mini",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-mini",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Mini",
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-max",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max",
Description: "Stable version of GPT 5.1 Codex Max",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2",
Description: "Stable version of GPT 5.2",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2-codex",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2 Codex",
Description: "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
}
}
// GetQwenModels returns the standard Qwen model definitions
func GetQwenModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "qwen3-coder-plus",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Plus",
Description: "Advanced code generation and understanding model",
ContextLength: 32768,
MaxCompletionTokens: 8192,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "qwen3-coder-flash",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Flash",
Description: "Fast code generation model",
ContextLength: 8192,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "vision-model",
Object: "model",
Created: 1758672000,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Vision Model",
Description: "Vision model model",
ContextLength: 32768,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
}
}
// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
// Uses level-based configuration so standard normalization flows apply before conversion.
var iFlowThinkingSupport = &ThinkingSupport{
Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
}
// GetIFlowModels returns supported models for iFlow OAuth accounts.
func GetIFlowModels() []*ModelInfo {
entries := []struct {
ID string
DisplayName string
Description string
Created int64
Thinking *ThinkingSupport
}{
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport},
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport},
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
}
models := make([]*ModelInfo, 0, len(entries))
for _, entry := range entries {
models = append(models, &ModelInfo{
ID: entry.ID,
Object: "model",
Created: entry.Created,
OwnedBy: "iflow",
Type: "iflow",
DisplayName: entry.DisplayName,
Description: entry.Description,
Thinking: entry.Thinking,
})
}
return models
}
// AntigravityModelConfig captures static antigravity model overrides, including
// Thinking budget limits and provider max completion tokens.
type AntigravityModelConfig struct {
Thinking *ThinkingSupport
MaxCompletionTokens int
}
// GetAntigravityModelConfig returns static configuration for antigravity models.
// Keys use upstream model names returned by the Antigravity models endpoint.
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
return map[string]*AntigravityModelConfig{
// "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
"gpt-oss-120b-medium": {},
"tab_flash_lite_preview": {},
}
}

View File

@@ -148,7 +148,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
attempts := antigravityRetryAttempts(e.cfg)
attempts := antigravityRetryAttempts(auth, e.cfg)
attemptLoop:
for attempt := 0; attempt < attempts; attempt++ {
@@ -289,7 +289,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
attempts := antigravityRetryAttempts(e.cfg)
attempts := antigravityRetryAttempts(auth, e.cfg)
attemptLoop:
for attempt := 0; attempt < attempts; attempt++ {
@@ -677,7 +677,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
attempts := antigravityRetryAttempts(e.cfg)
attempts := antigravityRetryAttempts(auth, e.cfg)
attemptLoop:
for attempt := 0; attempt < attempts; attempt++ {
@@ -1447,11 +1447,16 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
return defaultAntigravityAgent
}
func antigravityRetryAttempts(cfg *config.Config) int {
if cfg == nil {
return 1
func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
retry := 0
if cfg != nil {
retry = cfg.RequestRetry
}
if auth != nil {
if override, ok := auth.RequestRetryOverride(); ok {
retry = override
}
}
retry := cfg.RequestRetry
if retry < 0 {
retry = 0
}

View File

@@ -1,7 +1,7 @@
// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
// Package iflow implements thinking configuration for iFlow models.
//
// iFlow models use boolean toggle semantics:
// - GLM models: chat_template_kwargs.enable_thinking (boolean)
// - Models using chat_template_kwargs.enable_thinking (boolean toggle)
// - MiniMax models: reasoning_split (boolean)
//
// Level values are converted to boolean: none=false, all others=true
@@ -20,6 +20,7 @@ import (
// Applier implements thinking.ProviderApplier for iFlow models.
//
// iFlow-specific behavior:
// - enable_thinking toggle models: enable_thinking boolean
// - GLM models: enable_thinking boolean + clear_thinking=false
// - MiniMax models: reasoning_split boolean
// - Level to boolean: none=false, others=true
@@ -61,8 +62,8 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
return body, nil
}
if isGLMModel(modelInfo.ID) {
return applyGLM(body, config), nil
if isEnableThinkingModel(modelInfo.ID) {
return applyEnableThinking(body, config, isGLMModel(modelInfo.ID)), nil
}
if isMiniMaxModel(modelInfo.ID) {
@@ -97,7 +98,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
}
}
// applyGLM applies thinking configuration for GLM models.
// applyEnableThinking applies thinking configuration for models that use
// chat_template_kwargs.enable_thinking format.
//
// Output format when enabled:
//
@@ -107,9 +109,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
//
// {"chat_template_kwargs": {"enable_thinking": false}}
//
// Note: clear_thinking is only set when thinking is enabled, to preserve
// thinking output in the response.
func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
// Note: clear_thinking is only set for GLM models when thinking is enabled.
func applyEnableThinking(body []byte, config thinking.ThinkingConfig, setClearThinking bool) []byte {
enableThinking := configToBoolean(config)
if len(body) == 0 || !gjson.ValidBytes(body) {
@@ -118,8 +119,11 @@ func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
// clear_thinking is a GLM-only knob, strip it for other models.
result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
// clear_thinking only needed when thinking is enabled
if enableThinking {
if enableThinking && setClearThinking {
result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
}
@@ -143,8 +147,21 @@ func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
return result
}
// isEnableThinkingModel determines if the model uses chat_template_kwargs.enable_thinking format.
func isEnableThinkingModel(modelID string) bool {
if isGLMModel(modelID) {
return true
}
id := strings.ToLower(modelID)
switch id {
case "qwen3-max-preview", "deepseek-v3.2", "deepseek-v3.1":
return true
default:
return false
}
}
// isGLMModel determines if the model is a GLM series model.
// GLM models use chat_template_kwargs.enable_thinking format.
func isGLMModel(modelID string) bool {
return strings.HasPrefix(strings.ToLower(modelID), "glm")
}

View File

@@ -175,7 +175,7 @@ func convertConstToEnum(jsonStr string) string {
return jsonStr
}
// convertEnumValuesToStrings ensures all enum values are strings.
// convertEnumValuesToStrings ensures all enum values are strings and the schema type is set to string.
// Gemini API requires enum values to be of type string, not numbers or booleans.
func convertEnumValuesToStrings(jsonStr string) string {
for _, p := range findPaths(jsonStr, "enum") {
@@ -185,19 +185,15 @@ func convertEnumValuesToStrings(jsonStr string) string {
}
var stringVals []string
needsConversion := false
for _, item := range arr.Array() {
// Check if any value is not a string
if item.Type != gjson.String {
needsConversion = true
}
stringVals = append(stringVals, item.String())
}
// Only update if we found non-string values
if needsConversion {
jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
}
// Always update enum values to strings and set type to "string"
// This ensures compatibility with Antigravity Gemini which only allows enum for STRING type
jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
parentPath := trimSuffix(p, ".enum")
jsonStr, _ = sjson.Set(jsonStr, joinPath(parentPath, "type"), "string")
}
return jsonStr
}

View File

@@ -86,12 +86,19 @@ func (s *FileSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth, e
}
}
disabled, _ := metadata["disabled"].(bool)
status := coreauth.StatusActive
if disabled {
status = coreauth.StatusDisabled
}
a := &coreauth.Auth{
ID: id,
Provider: provider,
Label: label,
Prefix: prefix,
Status: coreauth.StatusActive,
Status: status,
Disabled: disabled,
Attributes: map[string]string{
"source": full,
"path": full,
@@ -167,6 +174,16 @@ func SynthesizeGeminiVirtualAuths(primary *coreauth.Auth, metadata map[string]an
"virtual_parent_id": primary.ID,
"type": metadata["type"],
}
if v, ok := metadata["disable_cooling"]; ok {
metadataCopy["disable_cooling"] = v
} else if v, ok := metadata["disable-cooling"]; ok {
metadataCopy["disable_cooling"] = v
}
if v, ok := metadata["request_retry"]; ok {
metadataCopy["request_retry"] = v
} else if v, ok := metadata["request-retry"]; ok {
metadataCopy["request_retry"] = v
}
proxy := strings.TrimSpace(primary.ProxyURL)
if proxy != "" {
metadataCopy["proxy_url"] = proxy

View File

@@ -69,10 +69,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
// Create a valid auth file
authData := map[string]any{
"type": "claude",
"email": "test@example.com",
"proxy_url": "http://proxy.local",
"prefix": "test-prefix",
"type": "claude",
"email": "test@example.com",
"proxy_url": "http://proxy.local",
"prefix": "test-prefix",
"disable_cooling": true,
"request_retry": 2,
}
data, _ := json.Marshal(authData)
err := os.WriteFile(filepath.Join(tempDir, "claude-auth.json"), data, 0644)
@@ -108,6 +110,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
if auths[0].ProxyURL != "http://proxy.local" {
t.Errorf("expected proxy_url http://proxy.local, got %s", auths[0].ProxyURL)
}
if v, ok := auths[0].Metadata["disable_cooling"].(bool); !ok || !v {
t.Errorf("expected disable_cooling true, got %v", auths[0].Metadata["disable_cooling"])
}
if v, ok := auths[0].Metadata["request_retry"].(float64); !ok || int(v) != 2 {
t.Errorf("expected request_retry 2, got %v", auths[0].Metadata["request_retry"])
}
if auths[0].Status != coreauth.StatusActive {
t.Errorf("expected status active, got %s", auths[0].Status)
}
@@ -336,9 +344,11 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
},
}
metadata := map[string]any{
"project_id": "project-a, project-b, project-c",
"email": "test@example.com",
"type": "gemini",
"project_id": "project-a, project-b, project-c",
"email": "test@example.com",
"type": "gemini",
"request_retry": 2,
"disable_cooling": true,
}
virtuals := SynthesizeGeminiVirtualAuths(primary, metadata, now)
@@ -376,6 +386,12 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
if v.ProxyURL != "http://proxy.local" {
t.Errorf("expected proxy_url http://proxy.local, got %s", v.ProxyURL)
}
if vv, ok := v.Metadata["disable_cooling"].(bool); !ok || !vv {
t.Errorf("expected disable_cooling true, got %v", v.Metadata["disable_cooling"])
}
if vv, ok := v.Metadata["request_retry"].(int); !ok || vv != 2 {
t.Errorf("expected request_retry 2, got %v", v.Metadata["request_retry"])
}
if v.Attributes["runtime_only"] != "true" {
t.Error("expected runtime_only=true")
}

View File

@@ -124,32 +124,47 @@ func (m *Manager) Stream(ctx context.Context, provider string, req *HTTPRequest)
out := make(chan StreamEvent)
go func() {
defer close(out)
send := func(ev StreamEvent) bool {
if ctx == nil {
out <- ev
return true
}
select {
case <-ctx.Done():
return false
case out <- ev:
return true
}
}
for {
select {
case <-ctx.Done():
out <- StreamEvent{Err: ctx.Err()}
return
case msg, ok := <-respCh:
if !ok {
out <- StreamEvent{Err: errors.New("wsrelay: stream closed")}
_ = send(StreamEvent{Err: errors.New("wsrelay: stream closed")})
return
}
switch msg.Type {
case MessageTypeStreamStart:
resp := decodeResponse(msg.Payload)
out <- StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}
if okSend := send(StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}); !okSend {
return
}
case MessageTypeStreamChunk:
chunk := decodeChunk(msg.Payload)
out <- StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}
if okSend := send(StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}); !okSend {
return
}
case MessageTypeStreamEnd:
out <- StreamEvent{Type: MessageTypeStreamEnd}
_ = send(StreamEvent{Type: MessageTypeStreamEnd})
return
case MessageTypeError:
out <- StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)}
_ = send(StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)})
return
case MessageTypeHTTPResp:
resp := decodeResponse(msg.Payload)
out <- StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body}
_ = send(StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body})
return
default:
}

View File

@@ -506,6 +506,32 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
bootstrapRetries := 0
maxBootstrapRetries := StreamingBootstrapRetries(h.Cfg)
sendErr := func(msg *interfaces.ErrorMessage) bool {
if ctx == nil {
errChan <- msg
return true
}
select {
case <-ctx.Done():
return false
case errChan <- msg:
return true
}
}
sendData := func(chunk []byte) bool {
if ctx == nil {
dataChan <- chunk
return true
}
select {
case <-ctx.Done():
return false
case dataChan <- chunk:
return true
}
}
bootstrapEligible := func(err error) bool {
status := statusFromError(err)
if status == 0 {
@@ -565,12 +591,14 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
addon = hdr.Clone()
}
}
errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon}
_ = sendErr(&interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon})
return
}
if len(chunk.Payload) > 0 {
sentPayload = true
dataChan <- cloneBytes(chunk.Payload)
if okSendData := sendData(cloneBytes(chunk.Payload)); !okSendData {
return
}
}
}
}

View File

@@ -68,14 +68,13 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
return "", err
}
case auth.Metadata != nil:
auth.Metadata["disabled"] = auth.Disabled
raw, errMarshal := json.Marshal(auth.Metadata)
if errMarshal != nil {
return "", fmt.Errorf("auth filestore: marshal metadata failed: %w", errMarshal)
}
if existing, errRead := os.ReadFile(path); errRead == nil {
// Use metadataEqualIgnoringTimestamps to skip writes when only timestamp fields change.
// This prevents the token refresh loop caused by timestamp/expired/expires_in changes.
if metadataEqualIgnoringTimestamps(existing, raw, auth.Provider) {
if jsonEqual(existing, raw) {
return path, nil
}
file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600)
@@ -216,12 +215,18 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
return nil, fmt.Errorf("stat file: %w", err)
}
id := s.idFor(path, baseDir)
disabled, _ := metadata["disabled"].(bool)
status := cliproxyauth.StatusActive
if disabled {
status = cliproxyauth.StatusDisabled
}
auth := &cliproxyauth.Auth{
ID: id,
Provider: provider,
FileName: id,
Label: s.labelFor(metadata),
Status: cliproxyauth.StatusActive,
Status: status,
Disabled: disabled,
Attributes: map[string]string{"path": path},
Metadata: metadata,
CreatedAt: info.ModTime(),
@@ -299,8 +304,7 @@ func (s *FileTokenStore) baseDirSnapshot() string {
return s.baseDir
}
// DEPRECATED: Use metadataEqualIgnoringTimestamps for comparing auth metadata.
// This function is kept for backward compatibility but can cause refresh loops.
// jsonEqual compares two JSON blobs by parsing them into Go objects and deep comparing.
func jsonEqual(a, b []byte) bool {
var objA any
var objB any
@@ -313,41 +317,6 @@ func jsonEqual(a, b []byte) bool {
return deepEqualJSON(objA, objB)
}
// metadataEqualIgnoringTimestamps compares two metadata JSON blobs,
// ignoring fields that change on every refresh but don't affect functionality.
// This prevents unnecessary file writes that would trigger watcher events and
// create refresh loops.
// The provider parameter controls whether access_token is ignored: providers like
// Google OAuth (gemini, gemini-cli) can re-fetch tokens when needed, while others
// like iFlow require the refreshed token to be persisted.
func metadataEqualIgnoringTimestamps(a, b []byte, provider string) bool {
var objA, objB map[string]any
if err := json.Unmarshal(a, &objA); err != nil {
return false
}
if err := json.Unmarshal(b, &objB); err != nil {
return false
}
// Fields to ignore: these change on every refresh but don't affect authentication logic.
// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh"}
// For providers that can re-fetch tokens when needed (e.g., Google OAuth),
// we ignore access_token to avoid unnecessary file writes.
switch provider {
case "gemini", "gemini-cli", "antigravity":
ignoredFields = append(ignoredFields, "access_token")
}
for _, field := range ignoredFields {
delete(objA, field)
delete(objB, field)
}
return deepEqualJSON(objA, objB)
}
func deepEqualJSON(a, b any) bool {
switch valA := a.(type) {
case map[string]any:

View File

@@ -61,6 +61,15 @@ func SetQuotaCooldownDisabled(disable bool) {
quotaCooldownDisabled.Store(disable)
}
func quotaCooldownDisabledForAuth(auth *Auth) bool {
if auth != nil {
if override, ok := auth.DisableCoolingOverride(); ok {
return override
}
}
return quotaCooldownDisabled.Load()
}
// Result captures execution outcome used to adjust auth state.
type Result struct {
// AuthID references the auth that produced this result.
@@ -468,20 +477,16 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
}
retryTimes, maxWait := m.retrySettings()
attempts := retryTimes + 1
if attempts < 1 {
attempts = 1
}
_, maxWait := m.retrySettings()
var lastErr error
for attempt := 0; attempt < attempts; attempt++ {
for attempt := 0; ; attempt++ {
resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts)
if errExec == nil {
return resp, nil
}
lastErr = errExec
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
if !shouldRetry {
break
}
@@ -503,20 +508,16 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
}
retryTimes, maxWait := m.retrySettings()
attempts := retryTimes + 1
if attempts < 1 {
attempts = 1
}
_, maxWait := m.retrySettings()
var lastErr error
for attempt := 0; attempt < attempts; attempt++ {
for attempt := 0; ; attempt++ {
resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts)
if errExec == nil {
return resp, nil
}
lastErr = errExec
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
if !shouldRetry {
break
}
@@ -538,20 +539,16 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
}
retryTimes, maxWait := m.retrySettings()
attempts := retryTimes + 1
if attempts < 1 {
attempts = 1
}
_, maxWait := m.retrySettings()
var lastErr error
for attempt := 0; attempt < attempts; attempt++ {
for attempt := 0; ; attempt++ {
chunks, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
if errStream == nil {
return chunks, nil
}
lastErr = errStream
wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, normalized, req.Model, maxWait)
wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, normalized, req.Model, maxWait)
if !shouldRetry {
break
}
@@ -721,6 +718,7 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
defer close(out)
var failed bool
forward := true
for chunk := range streamChunks {
if chunk.Err != nil && !failed {
failed = true
@@ -731,7 +729,18 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
}
m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
}
out <- chunk
if !forward {
continue
}
if streamCtx == nil {
out <- chunk
continue
}
select {
case <-streamCtx.Done():
forward = false
case out <- chunk:
}
}
if !failed {
m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
@@ -1034,11 +1043,15 @@ func (m *Manager) retrySettings() (int, time.Duration) {
return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load())
}
func (m *Manager) closestCooldownWait(providers []string, model string) (time.Duration, bool) {
func (m *Manager) closestCooldownWait(providers []string, model string, attempt int) (time.Duration, bool) {
if m == nil || len(providers) == 0 {
return 0, false
}
now := time.Now()
defaultRetry := int(m.requestRetry.Load())
if defaultRetry < 0 {
defaultRetry = 0
}
providerSet := make(map[string]struct{}, len(providers))
for i := range providers {
key := strings.TrimSpace(strings.ToLower(providers[i]))
@@ -1061,6 +1074,16 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
if _, ok := providerSet[providerKey]; !ok {
continue
}
effectiveRetry := defaultRetry
if override, ok := auth.RequestRetryOverride(); ok {
effectiveRetry = override
}
if effectiveRetry < 0 {
effectiveRetry = 0
}
if attempt >= effectiveRetry {
continue
}
blocked, reason, next := isAuthBlockedForModel(auth, model, now)
if !blocked || next.IsZero() || reason == blockReasonDisabled {
continue
@@ -1077,8 +1100,8 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
return minWait, found
}
func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
if err == nil || attempt >= maxAttempts-1 {
func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
if err == nil {
return 0, false
}
if maxWait <= 0 {
@@ -1087,7 +1110,7 @@ func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, pro
if status := statusCodeFromError(err); status == http.StatusOK {
return 0, false
}
wait, found := m.closestCooldownWait(providers, model)
wait, found := m.closestCooldownWait(providers, model, attempt)
if !found || wait > maxWait {
return 0, false
}
@@ -1176,7 +1199,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
}
@@ -1193,7 +1216,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
shouldSuspendModel = true
setModelQuota = true
case 408, 500, 502, 503, 504:
if quotaCooldownDisabled.Load() {
if quotaCooldownDisabledForAuth(auth) {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
@@ -1439,7 +1462,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
if retryAfter != nil {
next = now.Add(*retryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
}
@@ -1449,7 +1472,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
auth.NextRetryAfter = next
case 408, 500, 502, 503, 504:
auth.StatusMessage = "transient upstream error"
if quotaCooldownDisabled.Load() {
if quotaCooldownDisabledForAuth(auth) {
auth.NextRetryAfter = time.Time{}
} else {
auth.NextRetryAfter = now.Add(1 * time.Minute)
@@ -1462,11 +1485,11 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
}
// nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors.
func nextQuotaCooldown(prevLevel int) (time.Duration, int) {
func nextQuotaCooldown(prevLevel int, disableCooling bool) (time.Duration, int) {
if prevLevel < 0 {
prevLevel = 0
}
if quotaCooldownDisabled.Load() {
if disableCooling {
return 0, prevLevel
}
cooldown := quotaBackoffBase * time.Duration(1<<prevLevel)
@@ -1642,6 +1665,9 @@ func (m *Manager) persist(ctx context.Context, auth *Auth) error {
if m.store == nil || auth == nil {
return nil
}
if shouldSkipPersist(ctx) {
return nil
}
if auth.Attributes != nil {
if v := strings.ToLower(strings.TrimSpace(auth.Attributes["runtime_only"])); v == "true" {
return nil

View File

@@ -0,0 +1,97 @@
package auth
import (
"context"
"testing"
"time"
)
func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) {
m := NewManager(nil, nil, nil)
m.SetRetryConfig(3, 30*time.Second)
model := "test-model"
next := time.Now().Add(5 * time.Second)
auth := &Auth{
ID: "auth-1",
Provider: "claude",
Metadata: map[string]any{
"request_retry": float64(0),
},
ModelStates: map[string]*ModelState{
model: {
Unavailable: true,
Status: StatusError,
NextRetryAfter: next,
},
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
_, maxWait := m.retrySettings()
wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
if shouldRetry {
t.Fatalf("expected shouldRetry=false for request_retry=0, got true (wait=%v)", wait)
}
auth.Metadata["request_retry"] = float64(1)
if _, errUpdate := m.Update(context.Background(), auth); errUpdate != nil {
t.Fatalf("update auth: %v", errUpdate)
}
wait, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
if !shouldRetry {
t.Fatalf("expected shouldRetry=true for request_retry=1, got false")
}
if wait <= 0 {
t.Fatalf("expected wait > 0, got %v", wait)
}
_, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 1, []string{"claude"}, model, maxWait)
if shouldRetry {
t.Fatalf("expected shouldRetry=false on attempt=1 for request_retry=1, got true")
}
}
func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
prev := quotaCooldownDisabled.Load()
quotaCooldownDisabled.Store(false)
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
m := NewManager(nil, nil, nil)
auth := &Auth{
ID: "auth-1",
Provider: "claude",
Metadata: map[string]any{
"disable_cooling": true,
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
model := "test-model"
m.MarkResult(context.Background(), Result{
AuthID: "auth-1",
Provider: "claude",
Model: model,
Success: false,
Error: &Error{HTTPStatus: 500, Message: "boom"},
})
updated, ok := m.GetByID("auth-1")
if !ok || updated == nil {
t.Fatalf("expected auth to be present")
}
state := updated.ModelStates[model]
if state == nil {
t.Fatalf("expected model state to be present")
}
if !state.NextRetryAfter.IsZero() {
t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
}
}

View File

@@ -0,0 +1,24 @@
package auth
import "context"
type skipPersistContextKey struct{}
// WithSkipPersist returns a derived context that disables persistence for Manager Update/Register calls.
// It is intended for code paths that are reacting to file watcher events, where the file on disk is
// already the source of truth and persisting again would create a write-back loop.
func WithSkipPersist(ctx context.Context) context.Context {
if ctx == nil {
ctx = context.Background()
}
return context.WithValue(ctx, skipPersistContextKey{}, true)
}
func shouldSkipPersist(ctx context.Context) bool {
if ctx == nil {
return false
}
v := ctx.Value(skipPersistContextKey{})
enabled, ok := v.(bool)
return ok && enabled
}

View File

@@ -0,0 +1,62 @@
package auth
import (
"context"
"sync/atomic"
"testing"
)
type countingStore struct {
saveCount atomic.Int32
}
func (s *countingStore) List(context.Context) ([]*Auth, error) { return nil, nil }
func (s *countingStore) Save(context.Context, *Auth) (string, error) {
s.saveCount.Add(1)
return "", nil
}
func (s *countingStore) Delete(context.Context, string) error { return nil }
func TestWithSkipPersist_DisablesUpdatePersistence(t *testing.T) {
store := &countingStore{}
mgr := NewManager(store, nil, nil)
auth := &Auth{
ID: "auth-1",
Provider: "antigravity",
Metadata: map[string]any{"type": "antigravity"},
}
if _, err := mgr.Update(context.Background(), auth); err != nil {
t.Fatalf("Update returned error: %v", err)
}
if got := store.saveCount.Load(); got != 1 {
t.Fatalf("expected 1 Save call, got %d", got)
}
ctxSkip := WithSkipPersist(context.Background())
if _, err := mgr.Update(ctxSkip, auth); err != nil {
t.Fatalf("Update(skipPersist) returned error: %v", err)
}
if got := store.saveCount.Load(); got != 1 {
t.Fatalf("expected Save call count to remain 1, got %d", got)
}
}
func TestWithSkipPersist_DisablesRegisterPersistence(t *testing.T) {
store := &countingStore{}
mgr := NewManager(store, nil, nil)
auth := &Auth{
ID: "auth-1",
Provider: "antigravity",
Metadata: map[string]any{"type": "antigravity"},
}
if _, err := mgr.Register(WithSkipPersist(context.Background()), auth); err != nil {
t.Fatalf("Register(skipPersist) returned error: %v", err)
}
if got := store.saveCount.Load(); got != 0 {
t.Fatalf("expected 0 Save calls, got %d", got)
}
}

View File

@@ -194,6 +194,108 @@ func (a *Auth) ProxyInfo() string {
return "via proxy"
}
// DisableCoolingOverride returns the auth-file scoped disable_cooling override when present.
// The value is read from metadata key "disable_cooling" (or legacy "disable-cooling").
func (a *Auth) DisableCoolingOverride() (bool, bool) {
if a == nil || a.Metadata == nil {
return false, false
}
if val, ok := a.Metadata["disable_cooling"]; ok {
if parsed, okParse := parseBoolAny(val); okParse {
return parsed, true
}
}
if val, ok := a.Metadata["disable-cooling"]; ok {
if parsed, okParse := parseBoolAny(val); okParse {
return parsed, true
}
}
return false, false
}
// RequestRetryOverride returns the auth-file scoped request_retry override when present.
// The value is read from metadata key "request_retry" (or legacy "request-retry").
func (a *Auth) RequestRetryOverride() (int, bool) {
if a == nil || a.Metadata == nil {
return 0, false
}
if val, ok := a.Metadata["request_retry"]; ok {
if parsed, okParse := parseIntAny(val); okParse {
if parsed < 0 {
parsed = 0
}
return parsed, true
}
}
if val, ok := a.Metadata["request-retry"]; ok {
if parsed, okParse := parseIntAny(val); okParse {
if parsed < 0 {
parsed = 0
}
return parsed, true
}
}
return 0, false
}
func parseBoolAny(val any) (bool, bool) {
switch typed := val.(type) {
case bool:
return typed, true
case string:
trimmed := strings.TrimSpace(typed)
if trimmed == "" {
return false, false
}
parsed, err := strconv.ParseBool(trimmed)
if err != nil {
return false, false
}
return parsed, true
case float64:
return typed != 0, true
case json.Number:
parsed, err := typed.Int64()
if err != nil {
return false, false
}
return parsed != 0, true
default:
return false, false
}
}
func parseIntAny(val any) (int, bool) {
switch typed := val.(type) {
case int:
return typed, true
case int32:
return int(typed), true
case int64:
return int(typed), true
case float64:
return int(typed), true
case json.Number:
parsed, err := typed.Int64()
if err != nil {
return 0, false
}
return int(parsed), true
case string:
trimmed := strings.TrimSpace(typed)
if trimmed == "" {
return 0, false
}
parsed, err := strconv.Atoi(trimmed)
if err != nil {
return 0, false
}
return parsed, true
default:
return 0, false
}
}
func (a *Auth) AccountInfo() (string, string) {
if a == nil {
return "", ""

View File

@@ -124,6 +124,7 @@ func (s *Service) ensureAuthUpdateQueue(ctx context.Context) {
}
func (s *Service) consumeAuthUpdates(ctx context.Context) {
ctx = coreauth.WithSkipPersist(ctx)
for {
select {
case <-ctx.Done():
@@ -680,6 +681,10 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
if a == nil || a.ID == "" {
return
}
if a.Disabled {
GlobalModelRegistry().UnregisterClient(a.ID)
return
}
authKind := strings.ToLower(strings.TrimSpace(a.Attributes["auth_kind"]))
if authKind == "" {
if kind, _ := a.AccountInfo(); strings.EqualFold(kind, "api_key") {

View File

@@ -2,6 +2,7 @@ package test
import (
"fmt"
"strings"
"testing"
"time"
@@ -2778,12 +2779,18 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
// Verify clear_thinking for iFlow GLM models when enable_thinking=true
if tc.to == "iflow" && tc.expectField == "chat_template_kwargs.enable_thinking" && tc.expectValue == "true" {
baseModel := thinking.ParseSuffix(tc.model).ModelName
isGLM := strings.HasPrefix(strings.ToLower(baseModel), "glm")
ctVal := gjson.GetBytes(body, "chat_template_kwargs.clear_thinking")
if !ctVal.Exists() {
t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
}
if ctVal.Bool() != false {
t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
if isGLM {
if !ctVal.Exists() {
t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
}
if ctVal.Bool() != false {
t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
}
} else if ctVal.Exists() {
t.Fatalf("expected no clear_thinking field for non-GLM enable_thinking model, body=%s", string(body))
}
}
})