Compare commits

..

12 Commits

Author SHA1 Message Date
Luis Pater
98596c0a3f **refactor(translator): remove service_tier from Codex OpenAI request payload** 2025-11-20 20:12:06 +08:00
Luis Pater
670ce2e528 Merge pull request #285 from router-for-me/iflow
feat(iflow): add cookie-based authentication endpoint
2025-11-20 20:04:38 +08:00
hkfires
3f4f8b3b2d feat(iflow): add cookie-based authentication endpoint 2025-11-20 18:23:43 +08:00
Luis Pater
371324c090 **feat(registry): expand Gemini model definitions and support Vertex AI** 2025-11-20 18:16:26 +08:00
Luis Pater
d50b0f7524 **refactor(executor): simplify Gemini CLI execution and remove internal retry logic**
- Removed nested retry handling for 429 rate limit errors.
- Simplified request/response handling by cleaning redundant retry-related code.
- Eliminated `parseRetryDelay` function and max retry configuration logic.
2025-11-20 17:49:37 +08:00
Luis Pater
0586da9c2b **refactor(registry): move Gemini 3 Pro Preview model definition to base set** 2025-11-20 10:51:16 +08:00
Luis Pater
618511ff67 Merge pull request #280 from ben-vargas/feat-enable-gemini-3-cli
feat: enable Gemini 3 Pro Preview with OAuth support
2025-11-20 08:46:57 +08:00
Ben Vargas
0ff094b87f fix(executor): prevent streaming on failed response when no fallback
Fix critical bug where ExecuteStream would create a streaming channel
from a failed (non-2xx) response after exhausting all retries with no
fallback models available.

When retries were exhausted on the last model, the code would break from
the inner loop but fall through to streaming channel creation (line 401),
immediately returning at line 461. This made the error handling code at
lines 464-471 unreachable, causing clients to receive an empty/closed
stream instead of a proper error response.

Solution: Check if httpResp is non-2xx before creating the streaming
channel. If failed, continue the outer loop to reach error handling.

Identified by: codex-bot review
Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484560423
2025-11-19 13:14:40 -07:00
Ben Vargas
ed23472d94 fix(executor): prevent streaming from 429 response when fallback available
Fix critical bug where ExecuteStream would create a streaming channel
using a 429 error response instead of continuing to the next fallback
model after exhausting retries.

When 429 retries were exhausted and a fallback model was available,
the inner retry loop would break but immediately fall through to the
streaming channel creation, attempting to stream from the failed 429
response instead of trying the next model.

Solution: Add shouldContinueToNextModel flag to explicitly skip the
streaming logic and continue the outer model loop when appropriate.

Identified by: codex-bot review
Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484479106
2025-11-19 13:05:38 -07:00
Ben Vargas
ede4471b84 feat(translator): add default thinkingConfig for gemini-3-pro-preview
Match official Gemini CLI behavior by always sending default
thinkingConfig when client doesn't specify reasoning parameters.

- Set thinkingBudget=-1 (dynamic) for gemini-3-pro-preview
- Set include_thoughts=true to return thinking process
- Apply to both /v1/chat/completions and /v1/responses endpoints
- See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
2025-11-19 12:47:39 -07:00
Ben Vargas
6a3de3a89c feat(executor): add intelligent retry logic for 429 rate limits
Implement Google RetryInfo.retryDelay support for handling 429 rate
limit errors. Retries same model up to 3 times using exact delays
from Google's API before trying fallback models.

- Add parseRetryDelay() to extract Google's retry guidance
- Implement inner retry loop in Execute() and ExecuteStream()
- Context-aware waiting with cancellation support
- Cap delays at 60s maximum for safety
2025-11-19 12:47:39 -07:00
Ben Vargas
782bba0bc4 feat(registry): enable gemini-3-pro-preview for gemini-cli provider
Add gemini-3-pro-preview model to GetGeminiCLIModels() to make it
available for OAuth-based Gemini CLI users, matching the model
already available in AI Studio provider.

Model spec:
- ID: gemini-3-pro-preview
- Version: 3.0
- Input: 1M tokens
- Output: 64K tokens
- Thinking: 128-32K tokens (dynamic)
2025-11-19 12:47:39 -07:00
12 changed files with 527 additions and 197 deletions

View File

@@ -1443,6 +1443,87 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
}
func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
ctx := context.Background()
var payload struct {
Cookie string `json:"cookie"`
}
if err := c.ShouldBindJSON(&payload); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
return
}
cookieValue := strings.TrimSpace(payload.Cookie)
if cookieValue == "" {
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
return
}
cookieValue, errNormalize := iflowauth.NormalizeCookie(cookieValue)
if errNormalize != nil {
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errNormalize.Error()})
return
}
authSvc := iflowauth.NewIFlowAuth(h.cfg)
tokenData, errAuth := authSvc.AuthenticateWithCookie(ctx, cookieValue)
if errAuth != nil {
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errAuth.Error()})
return
}
tokenData.Cookie = cookieValue
tokenStorage := authSvc.CreateCookieTokenStorage(tokenData)
email := strings.TrimSpace(tokenStorage.Email)
if email == "" {
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "failed to extract email from token"})
return
}
fileName := iflowauth.SanitizeIFlowFileName(email)
if fileName == "" {
fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
}
tokenStorage.Email = email
record := &coreauth.Auth{
ID: fmt.Sprintf("iflow-%s.json", fileName),
Provider: "iflow",
FileName: fmt.Sprintf("iflow-%s.json", fileName),
Storage: tokenStorage,
Metadata: map[string]any{
"email": email,
"api_key": tokenStorage.APIKey,
"expired": tokenStorage.Expire,
"cookie": tokenStorage.Cookie,
"type": tokenStorage.Type,
"last_refresh": tokenStorage.LastRefresh,
},
Attributes: map[string]string{
"api_key": tokenStorage.APIKey,
},
}
savedPath, errSave := h.saveTokenRecord(ctx, record)
if errSave != nil {
c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to save authentication tokens"})
return
}
fmt.Printf("iFlow cookie authentication successful. Token saved to %s\n", savedPath)
c.JSON(http.StatusOK, gin.H{
"status": "ok",
"saved_path": savedPath,
"email": email,
"expired": tokenStorage.Expire,
"type": tokenStorage.Type,
})
}
type projectSelectionRequiredError struct{}
func (e *projectSelectionRequiredError) Error() string {

View File

@@ -518,6 +518,7 @@ func (s *Server) registerManagementRoutes() {
mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
}
}

View File

@@ -0,0 +1,38 @@
package iflow
import (
"fmt"
"strings"
)
// NormalizeCookie normalizes raw cookie strings for iFlow authentication flows.
func NormalizeCookie(raw string) (string, error) {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return "", fmt.Errorf("cookie cannot be empty")
}
combined := strings.Join(strings.Fields(trimmed), " ")
if !strings.HasSuffix(combined, ";") {
combined += ";"
}
if !strings.Contains(combined, "BXAuth=") {
return "", fmt.Errorf("cookie missing BXAuth field")
}
return combined, nil
}
// SanitizeIFlowFileName normalizes user identifiers for safe filename usage.
func SanitizeIFlowFileName(raw string) string {
if raw == "" {
return ""
}
cleanEmail := strings.ReplaceAll(raw, "*", "x")
var result strings.Builder
for _, r := range cleanEmail {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '@' || r == '.' || r == '-' {
result.WriteRune(r)
}
}
return strings.TrimSpace(result.String())
}

View File

@@ -71,22 +71,9 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
return "", fmt.Errorf("failed to read cookie: %w", err)
}
line = strings.TrimSpace(line)
if line == "" {
return "", fmt.Errorf("cookie cannot be empty")
}
// Clean up any extra whitespace and join multiple spaces
cookie := strings.Join(strings.Fields(line), " ")
// Ensure it ends properly
if !strings.HasSuffix(cookie, ";") {
cookie = cookie + ";"
}
// Ensure BXAuth is present in the cookie
if !strings.Contains(cookie, "BXAuth=") {
return "", fmt.Errorf("BXAuth field not found in cookie")
cookie, err := iflow.NormalizeCookie(line)
if err != nil {
return "", err
}
return cookie, nil
@@ -94,17 +81,6 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
// getAuthFilePath returns the auth file path for the given provider and email
func getAuthFilePath(cfg *config.Config, provider, email string) string {
// Clean email to make it filename-safe
cleanEmail := strings.ReplaceAll(email, "*", "x")
// Remove any unsafe characters, but allow standard email chars (@, ., -)
var result strings.Builder
for _, r := range cleanEmail {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') ||
r == '_' || r == '@' || r == '.' || r == '-' {
result.WriteRune(r)
}
}
return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, result.String())
fileName := iflow.SanitizeIFlowFileName(email)
return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, fileName)
}

View File

@@ -68,28 +68,13 @@ func GetClaudeModels() []*ModelInfo {
}
}
// GeminiModels returns the shared base Gemini model set used by multiple providers.
func GeminiModels() []*ModelInfo {
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-flash",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-pro",
Object: "model",
Created: time.Now().Unix(),
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
@@ -101,10 +86,25 @@ func GeminiModels() []*ModelInfo {
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: time.Now().Unix(),
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
@@ -116,34 +116,110 @@ func GeminiModels() []*ModelInfo {
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
}
}
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo { return GeminiModels() }
func GetGeminiVertexModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
}
}
// GetGeminiCLIModels returns the standard Gemini model definitions
func GetGeminiCLIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-flash",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-pro",
Object: "model",
Created: time.Now().Unix(),
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
@@ -155,10 +231,25 @@ func GetGeminiCLIModels() []*ModelInfo {
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: time.Now().Unix(),
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
@@ -170,122 +261,163 @@ func GetGeminiCLIModels() []*ModelInfo {
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
// {
// ID: "gemini-3-pro-preview-11-2025",
// Object: "model",
// Created: time.Now().Unix(),
// OwnedBy: "google",
// Type: "gemini",
// Name: "models/gemini-3-pro-preview-11-2025",
// Version: "3",
// DisplayName: "Gemini 3 Pro Preview 11-2025",
// Description: "Latest preview of Gemini Pro",
// InputTokenLimit: 1048576,
// OutputTokenLimit: 65536,
// SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
// },
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
}
}
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo {
base := GeminiModels()
return append(base,
[]*ModelInfo{
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-pro-latest",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-pro-latest",
Version: "2.5",
DisplayName: "Gemini Pro Latest",
Description: "Latest release of Gemini Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-flash-latest",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-latest",
Version: "2.5",
DisplayName: "Gemini Flash Latest",
Description: "Latest release of Gemini Flash",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-flash-lite-latest",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-lite-latest",
Version: "2.5",
DisplayName: "Gemini Flash-Lite Latest",
Description: "Latest release of Gemini Flash-Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-image-preview",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image-preview",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image Preview",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}...,
)
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-pro-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-pro-latest",
Version: "2.5",
DisplayName: "Gemini Pro Latest",
Description: "Latest release of Gemini Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-flash-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-latest",
Version: "2.5",
DisplayName: "Gemini Flash Latest",
Description: "Latest release of Gemini Flash",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-flash-lite-latest",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-lite-latest",
Version: "2.5",
DisplayName: "Gemini Flash-Lite Latest",
Description: "Latest release of Gemini Flash-Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-image-preview",
Object: "model",
Created: 1756166400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image-preview",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image Preview",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: 1759363200,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}
}
// GetOpenAIModels returns the standard OpenAI model definitions

View File

@@ -180,7 +180,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
continue
}
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
err = newGeminiStatusErr(httpResp.StatusCode, data)
return resp, err
}
@@ -190,7 +190,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
if lastStatus == 0 {
lastStatus = 429
}
err = statusErr{code: lastStatus, msg: string(lastBody)}
err = newGeminiStatusErr(lastStatus, lastBody)
return resp, err
}
@@ -304,7 +304,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
}
continue
}
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
err = newGeminiStatusErr(httpResp.StatusCode, data)
return nil, err
}
@@ -377,7 +377,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
if lastStatus == 0 {
lastStatus = 429
}
err = statusErr{code: lastStatus, msg: string(lastBody)}
err = newGeminiStatusErr(lastStatus, lastBody)
return nil, err
}
@@ -485,7 +485,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
if lastStatus == 0 {
lastStatus = 429
}
return cliproxyexecutor.Response{}, statusErr{code: lastStatus, msg: string(lastBody)}
return cliproxyexecutor.Response{}, newGeminiStatusErr(lastStatus, lastBody)
}
func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
@@ -769,3 +769,42 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
}
return rawJSON
}
func newGeminiStatusErr(statusCode int, body []byte) statusErr {
err := statusErr{code: statusCode, msg: string(body)}
if statusCode == http.StatusTooManyRequests {
if retryAfter, parseErr := parseRetryDelay(body); parseErr == nil && retryAfter != nil {
err.retryAfter = retryAfter
}
}
return err
}
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
// Returns the parsed duration or an error if it cannot be determined.
func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
// Try to parse the retryDelay from the error response
// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
details := gjson.GetBytes(errorBody, "error.details")
if !details.Exists() || !details.IsArray() {
return nil, fmt.Errorf("no error.details found")
}
for _, detail := range details.Array() {
typeVal := detail.Get("@type").String()
if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
retryDelay := detail.Get("retryDelay").String()
if retryDelay != "" {
// Parse duration string like "0.847655010s"
duration, err := time.ParseDuration(retryDelay)
if err != nil {
return nil, fmt.Errorf("failed to parse duration")
}
return &duration, nil
}
}
}
return nil, fmt.Errorf("no RetryInfo found")
}

View File

@@ -8,6 +8,7 @@ import (
"io"
"net/http"
"strings"
"time"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -340,8 +341,9 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
}
type statusErr struct {
code int
msg string
code int
msg string
retryAfter *time.Duration
}
func (e statusErr) Error() string {
@@ -350,4 +352,5 @@ func (e statusErr) Error() string {
}
return fmt.Sprintf("status %d", e.code)
}
func (e statusErr) StatusCode() int { return e.code }
func (e statusErr) StatusCode() int { return e.code }
func (e statusErr) RetryAfter() *time.Duration { return e.retryAfter }

View File

@@ -22,6 +22,7 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
rawJSON, _ = sjson.DeleteBytes(rawJSON, "max_completion_tokens")
rawJSON, _ = sjson.DeleteBytes(rawJSON, "temperature")
rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p")
rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier")
originalInstructions := ""
originalInstructionsText := ""

View File

@@ -88,6 +88,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
}
}
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
// This matches the official Gemini CLI behavior which always sends:
// { thinkingBudget: -1, includeThoughts: true }
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
// Temperature/top_p/top_k
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)

View File

@@ -6,6 +6,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -294,6 +295,17 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
}
}
}
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
// This matches the official Gemini CLI behavior which always sends:
// { thinkingBudget: -1, includeThoughts: true }
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
}
result := []byte(out)
result = common.AttachDefaultSafetySettings(result, "safetySettings")
return result

View File

@@ -62,6 +62,8 @@ type Result struct {
Model string
// Success marks whether the execution succeeded.
Success bool
// RetryAfter carries a provider supplied retry hint (e.g. 429 retryDelay).
RetryAfter *time.Duration
// Error describes the failure when Success is false.
Error *Error
}
@@ -325,6 +327,9 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
if errors.As(errExec, &se) && se != nil {
result.Error.HTTPStatus = se.StatusCode()
}
if ra := retryAfterFromError(errExec); ra != nil {
result.RetryAfter = ra
}
m.MarkResult(execCtx, result)
lastErr = errExec
continue
@@ -370,6 +375,9 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
if errors.As(errExec, &se) && se != nil {
result.Error.HTTPStatus = se.StatusCode()
}
if ra := retryAfterFromError(errExec); ra != nil {
result.RetryAfter = ra
}
m.MarkResult(execCtx, result)
lastErr = errExec
continue
@@ -415,6 +423,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
rerr.HTTPStatus = se.StatusCode()
}
result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: false, Error: rerr}
result.RetryAfter = retryAfterFromError(errStream)
m.MarkResult(execCtx, result)
lastErr = errStream
continue
@@ -556,17 +565,23 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
suspendReason = "payment_required"
shouldSuspendModel = true
case 429:
cooldown, nextLevel := nextQuotaCooldown(state.Quota.BackoffLevel)
var next time.Time
if cooldown > 0 {
next = now.Add(cooldown)
backoffLevel := state.Quota.BackoffLevel
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
if cooldown > 0 {
next = now.Add(cooldown)
}
backoffLevel = nextLevel
}
state.NextRetryAfter = next
state.Quota = QuotaState{
Exceeded: true,
Reason: "quota",
NextRecoverAt: next,
BackoffLevel: nextLevel,
BackoffLevel: backoffLevel,
}
suspendReason = "quota"
shouldSuspendModel = true
@@ -582,7 +597,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
auth.UpdatedAt = now
updateAggregatedAvailability(auth, now)
} else {
applyAuthFailureState(auth, result.Error, now)
applyAuthFailureState(auth, result.Error, result.RetryAfter, now)
}
}
@@ -742,6 +757,25 @@ func cloneError(err *Error) *Error {
}
}
func retryAfterFromError(err error) *time.Duration {
if err == nil {
return nil
}
type retryAfterProvider interface {
RetryAfter() *time.Duration
}
rap, ok := err.(retryAfterProvider)
if !ok || rap == nil {
return nil
}
retryAfter := rap.RetryAfter()
if retryAfter == nil {
return nil
}
val := *retryAfter
return &val
}
func statusCodeFromResult(err *Error) int {
if err == nil {
return 0
@@ -749,7 +783,7 @@ func statusCodeFromResult(err *Error) int {
return err.StatusCode()
}
func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Duration, now time.Time) {
if auth == nil {
return
}
@@ -774,13 +808,17 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
auth.StatusMessage = "quota exhausted"
auth.Quota.Exceeded = true
auth.Quota.Reason = "quota"
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
var next time.Time
if cooldown > 0 {
next = now.Add(cooldown)
if retryAfter != nil {
next = now.Add(*retryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
if cooldown > 0 {
next = now.Add(cooldown)
}
auth.Quota.BackoffLevel = nextLevel
}
auth.Quota.NextRecoverAt = next
auth.Quota.BackoffLevel = nextLevel
auth.NextRetryAfter = next
case 408, 500, 502, 503, 504:
auth.StatusMessage = "transient upstream error"

View File

@@ -629,7 +629,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
models = registry.GetGeminiModels()
case "vertex":
// Vertex AI Gemini supports the same model identifiers as Gemini.
models = registry.GetGeminiModels()
models = registry.GetGeminiVertexModels()
case "gemini-cli":
models = registry.GetGeminiCLIModels()
case "aistudio":