mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-18 20:30:51 +08:00
Merge branch 'dev' into feat-amp-cli-module
This commit is contained in:
@@ -1443,6 +1443,87 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
|
|||||||
c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
|
c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
var payload struct {
|
||||||
|
Cookie string `json:"cookie"`
|
||||||
|
}
|
||||||
|
if err := c.ShouldBindJSON(&payload); err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cookieValue := strings.TrimSpace(payload.Cookie)
|
||||||
|
|
||||||
|
if cookieValue == "" {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cookieValue, errNormalize := iflowauth.NormalizeCookie(cookieValue)
|
||||||
|
if errNormalize != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errNormalize.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
authSvc := iflowauth.NewIFlowAuth(h.cfg)
|
||||||
|
tokenData, errAuth := authSvc.AuthenticateWithCookie(ctx, cookieValue)
|
||||||
|
if errAuth != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errAuth.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenData.Cookie = cookieValue
|
||||||
|
|
||||||
|
tokenStorage := authSvc.CreateCookieTokenStorage(tokenData)
|
||||||
|
email := strings.TrimSpace(tokenStorage.Email)
|
||||||
|
if email == "" {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "failed to extract email from token"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fileName := iflowauth.SanitizeIFlowFileName(email)
|
||||||
|
if fileName == "" {
|
||||||
|
fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenStorage.Email = email
|
||||||
|
|
||||||
|
record := &coreauth.Auth{
|
||||||
|
ID: fmt.Sprintf("iflow-%s.json", fileName),
|
||||||
|
Provider: "iflow",
|
||||||
|
FileName: fmt.Sprintf("iflow-%s.json", fileName),
|
||||||
|
Storage: tokenStorage,
|
||||||
|
Metadata: map[string]any{
|
||||||
|
"email": email,
|
||||||
|
"api_key": tokenStorage.APIKey,
|
||||||
|
"expired": tokenStorage.Expire,
|
||||||
|
"cookie": tokenStorage.Cookie,
|
||||||
|
"type": tokenStorage.Type,
|
||||||
|
"last_refresh": tokenStorage.LastRefresh,
|
||||||
|
},
|
||||||
|
Attributes: map[string]string{
|
||||||
|
"api_key": tokenStorage.APIKey,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
savedPath, errSave := h.saveTokenRecord(ctx, record)
|
||||||
|
if errSave != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to save authentication tokens"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("iFlow cookie authentication successful. Token saved to %s\n", savedPath)
|
||||||
|
c.JSON(http.StatusOK, gin.H{
|
||||||
|
"status": "ok",
|
||||||
|
"saved_path": savedPath,
|
||||||
|
"email": email,
|
||||||
|
"expired": tokenStorage.Expire,
|
||||||
|
"type": tokenStorage.Type,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
type projectSelectionRequiredError struct{}
|
type projectSelectionRequiredError struct{}
|
||||||
|
|
||||||
func (e *projectSelectionRequiredError) Error() string {
|
func (e *projectSelectionRequiredError) Error() string {
|
||||||
|
|||||||
@@ -534,6 +534,7 @@ func (s *Server) registerManagementRoutes() {
|
|||||||
mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
|
mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
|
||||||
mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
|
mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
|
||||||
mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
|
mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
|
||||||
|
mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
|
||||||
mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
|
mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
38
internal/auth/iflow/cookie_helpers.go
Normal file
38
internal/auth/iflow/cookie_helpers.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
package iflow
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NormalizeCookie normalizes raw cookie strings for iFlow authentication flows.
|
||||||
|
func NormalizeCookie(raw string) (string, error) {
|
||||||
|
trimmed := strings.TrimSpace(raw)
|
||||||
|
if trimmed == "" {
|
||||||
|
return "", fmt.Errorf("cookie cannot be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
combined := strings.Join(strings.Fields(trimmed), " ")
|
||||||
|
if !strings.HasSuffix(combined, ";") {
|
||||||
|
combined += ";"
|
||||||
|
}
|
||||||
|
if !strings.Contains(combined, "BXAuth=") {
|
||||||
|
return "", fmt.Errorf("cookie missing BXAuth field")
|
||||||
|
}
|
||||||
|
return combined, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SanitizeIFlowFileName normalizes user identifiers for safe filename usage.
|
||||||
|
func SanitizeIFlowFileName(raw string) string {
|
||||||
|
if raw == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
cleanEmail := strings.ReplaceAll(raw, "*", "x")
|
||||||
|
var result strings.Builder
|
||||||
|
for _, r := range cleanEmail {
|
||||||
|
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '@' || r == '.' || r == '-' {
|
||||||
|
result.WriteRune(r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(result.String())
|
||||||
|
}
|
||||||
@@ -71,22 +71,9 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
|
|||||||
return "", fmt.Errorf("failed to read cookie: %w", err)
|
return "", fmt.Errorf("failed to read cookie: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
line = strings.TrimSpace(line)
|
cookie, err := iflow.NormalizeCookie(line)
|
||||||
if line == "" {
|
if err != nil {
|
||||||
return "", fmt.Errorf("cookie cannot be empty")
|
return "", err
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up any extra whitespace and join multiple spaces
|
|
||||||
cookie := strings.Join(strings.Fields(line), " ")
|
|
||||||
|
|
||||||
// Ensure it ends properly
|
|
||||||
if !strings.HasSuffix(cookie, ";") {
|
|
||||||
cookie = cookie + ";"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure BXAuth is present in the cookie
|
|
||||||
if !strings.Contains(cookie, "BXAuth=") {
|
|
||||||
return "", fmt.Errorf("BXAuth field not found in cookie")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return cookie, nil
|
return cookie, nil
|
||||||
@@ -94,17 +81,6 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
|
|||||||
|
|
||||||
// getAuthFilePath returns the auth file path for the given provider and email
|
// getAuthFilePath returns the auth file path for the given provider and email
|
||||||
func getAuthFilePath(cfg *config.Config, provider, email string) string {
|
func getAuthFilePath(cfg *config.Config, provider, email string) string {
|
||||||
// Clean email to make it filename-safe
|
fileName := iflow.SanitizeIFlowFileName(email)
|
||||||
cleanEmail := strings.ReplaceAll(email, "*", "x")
|
return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, fileName)
|
||||||
|
|
||||||
// Remove any unsafe characters, but allow standard email chars (@, ., -)
|
|
||||||
var result strings.Builder
|
|
||||||
for _, r := range cleanEmail {
|
|
||||||
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') ||
|
|
||||||
r == '_' || r == '@' || r == '.' || r == '-' {
|
|
||||||
result.WriteRune(r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, result.String())
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,82 +68,13 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GeminiModels returns the shared base Gemini model set used by multiple providers.
|
|
||||||
func GeminiModels() []*ModelInfo {
|
|
||||||
return []*ModelInfo{
|
|
||||||
{
|
|
||||||
ID: "gemini-2.5-flash",
|
|
||||||
Object: "model",
|
|
||||||
Created: time.Now().Unix(),
|
|
||||||
OwnedBy: "google",
|
|
||||||
Type: "gemini",
|
|
||||||
Name: "models/gemini-2.5-flash",
|
|
||||||
Version: "001",
|
|
||||||
DisplayName: "Gemini 2.5 Flash",
|
|
||||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
|
||||||
InputTokenLimit: 1048576,
|
|
||||||
OutputTokenLimit: 65536,
|
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
|
||||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gemini-2.5-pro",
|
|
||||||
Object: "model",
|
|
||||||
Created: time.Now().Unix(),
|
|
||||||
OwnedBy: "google",
|
|
||||||
Type: "gemini",
|
|
||||||
Name: "models/gemini-2.5-pro",
|
|
||||||
Version: "2.5",
|
|
||||||
DisplayName: "Gemini 2.5 Pro",
|
|
||||||
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
|
||||||
InputTokenLimit: 1048576,
|
|
||||||
OutputTokenLimit: 65536,
|
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gemini-2.5-flash-lite",
|
|
||||||
Object: "model",
|
|
||||||
Created: time.Now().Unix(),
|
|
||||||
OwnedBy: "google",
|
|
||||||
Type: "gemini",
|
|
||||||
Name: "models/gemini-2.5-flash-lite",
|
|
||||||
Version: "2.5",
|
|
||||||
DisplayName: "Gemini 2.5 Flash Lite",
|
|
||||||
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
|
||||||
InputTokenLimit: 1048576,
|
|
||||||
OutputTokenLimit: 65536,
|
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
|
||||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetGeminiModels returns the standard Gemini model definitions
|
// GetGeminiModels returns the standard Gemini model definitions
|
||||||
func GetGeminiModels() []*ModelInfo { return GeminiModels() }
|
func GetGeminiModels() []*ModelInfo {
|
||||||
|
|
||||||
// GetGeminiCLIModels returns the standard Gemini model definitions
|
|
||||||
func GetGeminiCLIModels() []*ModelInfo {
|
|
||||||
return []*ModelInfo{
|
return []*ModelInfo{
|
||||||
{
|
|
||||||
ID: "gemini-2.5-flash",
|
|
||||||
Object: "model",
|
|
||||||
Created: time.Now().Unix(),
|
|
||||||
OwnedBy: "google",
|
|
||||||
Type: "gemini",
|
|
||||||
Name: "models/gemini-2.5-flash",
|
|
||||||
Version: "001",
|
|
||||||
DisplayName: "Gemini 2.5 Flash",
|
|
||||||
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
|
||||||
InputTokenLimit: 1048576,
|
|
||||||
OutputTokenLimit: 65536,
|
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
|
||||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-pro",
|
ID: "gemini-2.5-pro",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
Created: time.Now().Unix(),
|
Created: 1750118400,
|
||||||
OwnedBy: "google",
|
OwnedBy: "google",
|
||||||
Type: "gemini",
|
Type: "gemini",
|
||||||
Name: "models/gemini-2.5-pro",
|
Name: "models/gemini-2.5-pro",
|
||||||
@@ -155,10 +86,25 @@ func GetGeminiCLIModels() []*ModelInfo {
|
|||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1750118400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash",
|
||||||
|
Version: "001",
|
||||||
|
DisplayName: "Gemini 2.5 Flash",
|
||||||
|
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-flash-lite",
|
ID: "gemini-2.5-flash-lite",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
Created: time.Now().Unix(),
|
Created: 1753142400,
|
||||||
OwnedBy: "google",
|
OwnedBy: "google",
|
||||||
Type: "gemini",
|
Type: "gemini",
|
||||||
Name: "models/gemini-2.5-flash-lite",
|
Name: "models/gemini-2.5-flash-lite",
|
||||||
@@ -173,7 +119,152 @@ func GetGeminiCLIModels() []*ModelInfo {
|
|||||||
{
|
{
|
||||||
ID: "gemini-3-pro-preview",
|
ID: "gemini-3-pro-preview",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
Created: time.Now().Unix(),
|
Created: 1737158400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-3-pro-preview",
|
||||||
|
Version: "3.0",
|
||||||
|
DisplayName: "Gemini 3 Pro Preview",
|
||||||
|
Description: "Gemini 3 Pro Preview",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetGeminiVertexModels() []*ModelInfo {
|
||||||
|
return []*ModelInfo{
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-pro",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1750118400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-pro",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Pro",
|
||||||
|
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1750118400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash",
|
||||||
|
Version: "001",
|
||||||
|
DisplayName: "Gemini 2.5 Flash",
|
||||||
|
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash-lite",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1753142400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash-lite",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Flash Lite",
|
||||||
|
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-3-pro-preview",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1737158400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-3-pro-preview",
|
||||||
|
Version: "3.0",
|
||||||
|
DisplayName: "Gemini 3 Pro Preview",
|
||||||
|
Description: "Gemini 3 Pro Preview",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-3-pro-image-preview",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1737158400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-3-pro-image-preview",
|
||||||
|
Version: "3.0",
|
||||||
|
DisplayName: "Gemini 3 Pro Image Preview",
|
||||||
|
Description: "Gemini 3 Pro Image Preview",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetGeminiCLIModels returns the standard Gemini model definitions
|
||||||
|
func GetGeminiCLIModels() []*ModelInfo {
|
||||||
|
return []*ModelInfo{
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-pro",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1750118400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-pro",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Pro",
|
||||||
|
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1750118400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash",
|
||||||
|
Version: "001",
|
||||||
|
DisplayName: "Gemini 2.5 Flash",
|
||||||
|
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash-lite",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1753142400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash-lite",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Flash Lite",
|
||||||
|
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-3-pro-preview",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1737158400,
|
||||||
OwnedBy: "google",
|
OwnedBy: "google",
|
||||||
Type: "gemini",
|
Type: "gemini",
|
||||||
Name: "models/gemini-3-pro-preview",
|
Name: "models/gemini-3-pro-preview",
|
||||||
@@ -190,102 +281,143 @@ func GetGeminiCLIModels() []*ModelInfo {
|
|||||||
|
|
||||||
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
|
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
|
||||||
func GetAIStudioModels() []*ModelInfo {
|
func GetAIStudioModels() []*ModelInfo {
|
||||||
base := GeminiModels()
|
return []*ModelInfo{
|
||||||
|
{
|
||||||
return append(base,
|
ID: "gemini-2.5-pro",
|
||||||
[]*ModelInfo{
|
Object: "model",
|
||||||
{
|
Created: 1750118400,
|
||||||
ID: "gemini-3-pro-preview",
|
OwnedBy: "google",
|
||||||
Object: "model",
|
Type: "gemini",
|
||||||
Created: time.Now().Unix(),
|
Name: "models/gemini-2.5-pro",
|
||||||
OwnedBy: "google",
|
Version: "2.5",
|
||||||
Type: "gemini",
|
DisplayName: "Gemini 2.5 Pro",
|
||||||
Name: "models/gemini-3-pro-preview",
|
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
||||||
Version: "3.0",
|
InputTokenLimit: 1048576,
|
||||||
DisplayName: "Gemini 3 Pro Preview",
|
OutputTokenLimit: 65536,
|
||||||
Description: "Gemini 3 Pro Preview",
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
InputTokenLimit: 1048576,
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
OutputTokenLimit: 65536,
|
},
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
{
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
ID: "gemini-2.5-flash",
|
||||||
},
|
Object: "model",
|
||||||
{
|
Created: 1750118400,
|
||||||
ID: "gemini-pro-latest",
|
OwnedBy: "google",
|
||||||
Object: "model",
|
Type: "gemini",
|
||||||
Created: time.Now().Unix(),
|
Name: "models/gemini-2.5-flash",
|
||||||
OwnedBy: "google",
|
Version: "001",
|
||||||
Type: "gemini",
|
DisplayName: "Gemini 2.5 Flash",
|
||||||
Name: "models/gemini-pro-latest",
|
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
||||||
Version: "2.5",
|
InputTokenLimit: 1048576,
|
||||||
DisplayName: "Gemini Pro Latest",
|
OutputTokenLimit: 65536,
|
||||||
Description: "Latest release of Gemini Pro",
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
InputTokenLimit: 1048576,
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
OutputTokenLimit: 65536,
|
},
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
{
|
||||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
ID: "gemini-2.5-flash-lite",
|
||||||
},
|
Object: "model",
|
||||||
{
|
Created: 1753142400,
|
||||||
ID: "gemini-flash-latest",
|
OwnedBy: "google",
|
||||||
Object: "model",
|
Type: "gemini",
|
||||||
Created: time.Now().Unix(),
|
Name: "models/gemini-2.5-flash-lite",
|
||||||
OwnedBy: "google",
|
Version: "2.5",
|
||||||
Type: "gemini",
|
DisplayName: "Gemini 2.5 Flash Lite",
|
||||||
Name: "models/gemini-flash-latest",
|
Description: "Our smallest and most cost effective model, built for at scale usage.",
|
||||||
Version: "2.5",
|
InputTokenLimit: 1048576,
|
||||||
DisplayName: "Gemini Flash Latest",
|
OutputTokenLimit: 65536,
|
||||||
Description: "Latest release of Gemini Flash",
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
InputTokenLimit: 1048576,
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
OutputTokenLimit: 65536,
|
},
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
{
|
||||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
ID: "gemini-3-pro-preview",
|
||||||
},
|
Object: "model",
|
||||||
{
|
Created: 1737158400,
|
||||||
ID: "gemini-flash-lite-latest",
|
OwnedBy: "google",
|
||||||
Object: "model",
|
Type: "gemini",
|
||||||
Created: time.Now().Unix(),
|
Name: "models/gemini-3-pro-preview",
|
||||||
OwnedBy: "google",
|
Version: "3.0",
|
||||||
Type: "gemini",
|
DisplayName: "Gemini 3 Pro Preview",
|
||||||
Name: "models/gemini-flash-lite-latest",
|
Description: "Gemini 3 Pro Preview",
|
||||||
Version: "2.5",
|
InputTokenLimit: 1048576,
|
||||||
DisplayName: "Gemini Flash-Lite Latest",
|
OutputTokenLimit: 65536,
|
||||||
Description: "Latest release of Gemini Flash-Lite",
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
InputTokenLimit: 1048576,
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
OutputTokenLimit: 65536,
|
},
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
{
|
||||||
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
ID: "gemini-pro-latest",
|
||||||
},
|
Object: "model",
|
||||||
{
|
Created: 1750118400,
|
||||||
ID: "gemini-2.5-flash-image-preview",
|
OwnedBy: "google",
|
||||||
Object: "model",
|
Type: "gemini",
|
||||||
Created: time.Now().Unix(),
|
Name: "models/gemini-pro-latest",
|
||||||
OwnedBy: "google",
|
Version: "2.5",
|
||||||
Type: "gemini",
|
DisplayName: "Gemini Pro Latest",
|
||||||
Name: "models/gemini-2.5-flash-image-preview",
|
Description: "Latest release of Gemini Pro",
|
||||||
Version: "2.5",
|
InputTokenLimit: 1048576,
|
||||||
DisplayName: "Gemini 2.5 Flash Image Preview",
|
OutputTokenLimit: 65536,
|
||||||
Description: "State-of-the-art image generation and editing model.",
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
InputTokenLimit: 1048576,
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
OutputTokenLimit: 8192,
|
},
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
{
|
||||||
// image models don't support thinkingConfig; leave Thinking nil
|
ID: "gemini-flash-latest",
|
||||||
},
|
Object: "model",
|
||||||
{
|
Created: 1750118400,
|
||||||
ID: "gemini-2.5-flash-image",
|
OwnedBy: "google",
|
||||||
Object: "model",
|
Type: "gemini",
|
||||||
Created: time.Now().Unix(),
|
Name: "models/gemini-flash-latest",
|
||||||
OwnedBy: "google",
|
Version: "2.5",
|
||||||
Type: "gemini",
|
DisplayName: "Gemini Flash Latest",
|
||||||
Name: "models/gemini-2.5-flash-image",
|
Description: "Latest release of Gemini Flash",
|
||||||
Version: "2.5",
|
InputTokenLimit: 1048576,
|
||||||
DisplayName: "Gemini 2.5 Flash Image",
|
OutputTokenLimit: 65536,
|
||||||
Description: "State-of-the-art image generation and editing model.",
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
InputTokenLimit: 1048576,
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
OutputTokenLimit: 8192,
|
},
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
{
|
||||||
// image models don't support thinkingConfig; leave Thinking nil
|
ID: "gemini-flash-lite-latest",
|
||||||
},
|
Object: "model",
|
||||||
}...,
|
Created: 1753142400,
|
||||||
)
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-flash-lite-latest",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini Flash-Lite Latest",
|
||||||
|
Description: "Latest release of Gemini Flash-Lite",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 65536,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash-image-preview",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1756166400,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash-image-preview",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Flash Image Preview",
|
||||||
|
Description: "State-of-the-art image generation and editing model.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 8192,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
// image models don't support thinkingConfig; leave Thinking nil
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash-image",
|
||||||
|
Object: "model",
|
||||||
|
Created: 1759363200,
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash-image",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Flash Image",
|
||||||
|
Description: "State-of-the-art image generation and editing model.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 8192,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
// image models don't support thinkingConfig; leave Thinking nil
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetOpenAIModels returns the standard OpenAI model definitions
|
// GetOpenAIModels returns the standard OpenAI model definitions
|
||||||
|
|||||||
@@ -99,124 +99,89 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
var lastStatus int
|
var lastStatus int
|
||||||
var lastBody []byte
|
var lastBody []byte
|
||||||
|
|
||||||
// Get max retry count from config, default to 3 if not set
|
|
||||||
maxRetries := e.cfg.RequestRetry
|
|
||||||
if maxRetries < 0 {
|
|
||||||
maxRetries = 3
|
|
||||||
}
|
|
||||||
|
|
||||||
for idx, attemptModel := range models {
|
for idx, attemptModel := range models {
|
||||||
retryCount := 0
|
payload := append([]byte(nil), basePayload...)
|
||||||
for {
|
if action == "countTokens" {
|
||||||
payload := append([]byte(nil), basePayload...)
|
payload = deleteJSONField(payload, "project")
|
||||||
if action == "countTokens" {
|
payload = deleteJSONField(payload, "model")
|
||||||
payload = deleteJSONField(payload, "project")
|
} else {
|
||||||
payload = deleteJSONField(payload, "model")
|
payload = setJSONField(payload, "project", projectID)
|
||||||
} else {
|
payload = setJSONField(payload, "model", attemptModel)
|
||||||
payload = setJSONField(payload, "project", projectID)
|
|
||||||
payload = setJSONField(payload, "model", attemptModel)
|
|
||||||
}
|
|
||||||
|
|
||||||
tok, errTok := tokenSource.Token()
|
|
||||||
if errTok != nil {
|
|
||||||
err = errTok
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
|
|
||||||
|
|
||||||
url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, action)
|
|
||||||
if opts.Alt != "" && action != "countTokens" {
|
|
||||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
|
||||||
}
|
|
||||||
|
|
||||||
reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
|
|
||||||
if errReq != nil {
|
|
||||||
err = errReq
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
reqHTTP.Header.Set("Content-Type", "application/json")
|
|
||||||
reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
|
|
||||||
applyGeminiCLIHeaders(reqHTTP)
|
|
||||||
reqHTTP.Header.Set("Accept", "application/json")
|
|
||||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
|
||||||
URL: url,
|
|
||||||
Method: http.MethodPost,
|
|
||||||
Headers: reqHTTP.Header.Clone(),
|
|
||||||
Body: payload,
|
|
||||||
Provider: e.Identifier(),
|
|
||||||
AuthID: authID,
|
|
||||||
AuthLabel: authLabel,
|
|
||||||
AuthType: authType,
|
|
||||||
AuthValue: authValue,
|
|
||||||
})
|
|
||||||
|
|
||||||
httpResp, errDo := httpClient.Do(reqHTTP)
|
|
||||||
if errDo != nil {
|
|
||||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
|
||||||
err = errDo
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
|
|
||||||
data, errRead := io.ReadAll(httpResp.Body)
|
|
||||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
|
||||||
log.Errorf("gemini cli executor: close response body error: %v", errClose)
|
|
||||||
}
|
|
||||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
|
||||||
if errRead != nil {
|
|
||||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
|
||||||
err = errRead
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
|
||||||
if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
|
|
||||||
reporter.publish(ctx, parseGeminiCLIUsage(data))
|
|
||||||
var param any
|
|
||||||
out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), payload, data, ¶m)
|
|
||||||
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
|
||||||
return resp, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
lastStatus = httpResp.StatusCode
|
|
||||||
lastBody = append([]byte(nil), data...)
|
|
||||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
|
||||||
|
|
||||||
// Handle 429 rate limit errors with retry
|
|
||||||
if httpResp.StatusCode == 429 {
|
|
||||||
if retryCount < maxRetries {
|
|
||||||
// Parse retry delay from Google's response
|
|
||||||
retryDelay := parseRetryDelay(data)
|
|
||||||
log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
|
|
||||||
retryCount++
|
|
||||||
|
|
||||||
// Wait for the specified delay
|
|
||||||
select {
|
|
||||||
case <-time.After(retryDelay):
|
|
||||||
// Continue to next retry iteration
|
|
||||||
continue
|
|
||||||
case <-ctx.Done():
|
|
||||||
// Context cancelled, return immediately
|
|
||||||
err = ctx.Err()
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Exhausted retries for this model, try next model if available
|
|
||||||
if idx+1 < len(models) {
|
|
||||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
|
|
||||||
break // Break inner loop to try next model
|
|
||||||
} else {
|
|
||||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, no additional fallback model", maxRetries, attemptModel)
|
|
||||||
// No more models to try, will return error below
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Non-429 error, don't retry this model
|
|
||||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Break inner loop if we hit this point (no retry needed or exhausted retries)
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tok, errTok := tokenSource.Token()
|
||||||
|
if errTok != nil {
|
||||||
|
err = errTok
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
|
||||||
|
|
||||||
|
url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, action)
|
||||||
|
if opts.Alt != "" && action != "countTokens" {
|
||||||
|
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||||
|
}
|
||||||
|
|
||||||
|
reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
|
||||||
|
if errReq != nil {
|
||||||
|
err = errReq
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
reqHTTP.Header.Set("Content-Type", "application/json")
|
||||||
|
reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
|
||||||
|
applyGeminiCLIHeaders(reqHTTP)
|
||||||
|
reqHTTP.Header.Set("Accept", "application/json")
|
||||||
|
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||||
|
URL: url,
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Headers: reqHTTP.Header.Clone(),
|
||||||
|
Body: payload,
|
||||||
|
Provider: e.Identifier(),
|
||||||
|
AuthID: authID,
|
||||||
|
AuthLabel: authLabel,
|
||||||
|
AuthType: authType,
|
||||||
|
AuthValue: authValue,
|
||||||
|
})
|
||||||
|
|
||||||
|
httpResp, errDo := httpClient.Do(reqHTTP)
|
||||||
|
if errDo != nil {
|
||||||
|
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||||
|
err = errDo
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, errRead := io.ReadAll(httpResp.Body)
|
||||||
|
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||||
|
log.Errorf("gemini cli executor: close response body error: %v", errClose)
|
||||||
|
}
|
||||||
|
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||||
|
if errRead != nil {
|
||||||
|
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||||
|
err = errRead
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||||
|
if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
|
||||||
|
reporter.publish(ctx, parseGeminiCLIUsage(data))
|
||||||
|
var param any
|
||||||
|
out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), payload, data, ¶m)
|
||||||
|
resp = cliproxyexecutor.Response{Payload: []byte(out)}
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
lastStatus = httpResp.StatusCode
|
||||||
|
lastBody = append([]byte(nil), data...)
|
||||||
|
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||||
|
if httpResp.StatusCode == 429 {
|
||||||
|
if idx+1 < len(models) {
|
||||||
|
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||||
|
} else {
|
||||||
|
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err = newGeminiStatusErr(httpResp.StatusCode, data)
|
||||||
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(lastBody) > 0 {
|
if len(lastBody) > 0 {
|
||||||
@@ -225,7 +190,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
if lastStatus == 0 {
|
if lastStatus == 0 {
|
||||||
lastStatus = 429
|
lastStatus = 429
|
||||||
}
|
}
|
||||||
err = statusErr{code: lastStatus, msg: string(lastBody)}
|
err = newGeminiStatusErr(lastStatus, lastBody)
|
||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -270,135 +235,77 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
var lastStatus int
|
var lastStatus int
|
||||||
var lastBody []byte
|
var lastBody []byte
|
||||||
|
|
||||||
// Get max retry count from config, default to 3 if not set
|
|
||||||
maxRetries := e.cfg.RequestRetry
|
|
||||||
if maxRetries < 0 {
|
|
||||||
maxRetries = 3
|
|
||||||
}
|
|
||||||
|
|
||||||
for idx, attemptModel := range models {
|
for idx, attemptModel := range models {
|
||||||
var httpResp *http.Response
|
payload := append([]byte(nil), basePayload...)
|
||||||
var payload []byte
|
payload = setJSONField(payload, "project", projectID)
|
||||||
var errDo error
|
payload = setJSONField(payload, "model", attemptModel)
|
||||||
shouldContinueToNextModel := false
|
|
||||||
|
|
||||||
retryCount := 0
|
tok, errTok := tokenSource.Token()
|
||||||
// Inner retry loop for 429 errors on the same model
|
if errTok != nil {
|
||||||
for {
|
err = errTok
|
||||||
payload = append([]byte(nil), basePayload...)
|
return nil, err
|
||||||
payload = setJSONField(payload, "project", projectID)
|
}
|
||||||
payload = setJSONField(payload, "model", attemptModel)
|
updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
|
||||||
|
|
||||||
tok, errTok := tokenSource.Token()
|
url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, "streamGenerateContent")
|
||||||
if errTok != nil {
|
if opts.Alt == "" {
|
||||||
err = errTok
|
url = url + "?alt=sse"
|
||||||
|
} else {
|
||||||
|
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||||
|
}
|
||||||
|
|
||||||
|
reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
|
||||||
|
if errReq != nil {
|
||||||
|
err = errReq
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
reqHTTP.Header.Set("Content-Type", "application/json")
|
||||||
|
reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
|
||||||
|
applyGeminiCLIHeaders(reqHTTP)
|
||||||
|
reqHTTP.Header.Set("Accept", "text/event-stream")
|
||||||
|
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||||
|
URL: url,
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Headers: reqHTTP.Header.Clone(),
|
||||||
|
Body: payload,
|
||||||
|
Provider: e.Identifier(),
|
||||||
|
AuthID: authID,
|
||||||
|
AuthLabel: authLabel,
|
||||||
|
AuthType: authType,
|
||||||
|
AuthValue: authValue,
|
||||||
|
})
|
||||||
|
|
||||||
|
httpResp, errDo := httpClient.Do(reqHTTP)
|
||||||
|
if errDo != nil {
|
||||||
|
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||||
|
err = errDo
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||||
|
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||||
|
data, errRead := io.ReadAll(httpResp.Body)
|
||||||
|
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||||
|
log.Errorf("gemini cli executor: close response body error: %v", errClose)
|
||||||
|
}
|
||||||
|
if errRead != nil {
|
||||||
|
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||||
|
err = errRead
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
|
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||||
|
lastStatus = httpResp.StatusCode
|
||||||
url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, "streamGenerateContent")
|
lastBody = append([]byte(nil), data...)
|
||||||
if opts.Alt == "" {
|
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||||
url = url + "?alt=sse"
|
if httpResp.StatusCode == 429 {
|
||||||
} else {
|
if idx+1 < len(models) {
|
||||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||||
}
|
|
||||||
|
|
||||||
reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
|
|
||||||
if errReq != nil {
|
|
||||||
err = errReq
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
reqHTTP.Header.Set("Content-Type", "application/json")
|
|
||||||
reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
|
|
||||||
applyGeminiCLIHeaders(reqHTTP)
|
|
||||||
reqHTTP.Header.Set("Accept", "text/event-stream")
|
|
||||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
|
||||||
URL: url,
|
|
||||||
Method: http.MethodPost,
|
|
||||||
Headers: reqHTTP.Header.Clone(),
|
|
||||||
Body: payload,
|
|
||||||
Provider: e.Identifier(),
|
|
||||||
AuthID: authID,
|
|
||||||
AuthLabel: authLabel,
|
|
||||||
AuthType: authType,
|
|
||||||
AuthValue: authValue,
|
|
||||||
})
|
|
||||||
|
|
||||||
httpResp, errDo = httpClient.Do(reqHTTP)
|
|
||||||
if errDo != nil {
|
|
||||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
|
||||||
err = errDo
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
|
||||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
|
||||||
data, errRead := io.ReadAll(httpResp.Body)
|
|
||||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
|
||||||
log.Errorf("gemini cli executor: close response body error: %v", errClose)
|
|
||||||
}
|
|
||||||
if errRead != nil {
|
|
||||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
|
||||||
err = errRead
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
|
||||||
lastStatus = httpResp.StatusCode
|
|
||||||
lastBody = append([]byte(nil), data...)
|
|
||||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
|
||||||
|
|
||||||
// Handle 429 rate limit errors with retry
|
|
||||||
if httpResp.StatusCode == 429 {
|
|
||||||
if retryCount < maxRetries {
|
|
||||||
// Parse retry delay from Google's response
|
|
||||||
retryDelay := parseRetryDelay(data)
|
|
||||||
log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
|
|
||||||
retryCount++
|
|
||||||
|
|
||||||
// Wait for the specified delay
|
|
||||||
select {
|
|
||||||
case <-time.After(retryDelay):
|
|
||||||
// Continue to next retry iteration
|
|
||||||
continue
|
|
||||||
case <-ctx.Done():
|
|
||||||
// Context cancelled, return immediately
|
|
||||||
err = ctx.Err()
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Exhausted retries for this model, try next model if available
|
|
||||||
if idx+1 < len(models) {
|
|
||||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
|
|
||||||
shouldContinueToNextModel = true
|
|
||||||
break // Break inner loop to try next model
|
|
||||||
} else {
|
|
||||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel)
|
|
||||||
// No more models to try, will return error below
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Non-429 error, don't retry this model
|
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
continue
|
||||||
// Break inner loop if we hit this point (no retry needed or exhausted retries)
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
err = newGeminiStatusErr(httpResp.StatusCode, data)
|
||||||
// Success - httpResp.StatusCode is 2xx, break out of retry loop
|
return nil, err
|
||||||
// and proceed to streaming logic below
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we need to try the next fallback model, skip streaming logic
|
|
||||||
if shouldContinueToNextModel {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have a failed response (non-2xx), don't attempt streaming
|
|
||||||
// Continue outer loop to try next model or return error
|
|
||||||
if httpResp == nil || httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out := make(chan cliproxyexecutor.StreamChunk)
|
out := make(chan cliproxyexecutor.StreamChunk)
|
||||||
@@ -470,7 +377,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
if lastStatus == 0 {
|
if lastStatus == 0 {
|
||||||
lastStatus = 429
|
lastStatus = 429
|
||||||
}
|
}
|
||||||
err = statusErr{code: lastStatus, msg: string(lastBody)}
|
err = newGeminiStatusErr(lastStatus, lastBody)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -578,7 +485,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
|||||||
if lastStatus == 0 {
|
if lastStatus == 0 {
|
||||||
lastStatus = 429
|
lastStatus = 429
|
||||||
}
|
}
|
||||||
return cliproxyexecutor.Response{}, statusErr{code: lastStatus, msg: string(lastBody)}
|
return cliproxyexecutor.Response{}, newGeminiStatusErr(lastStatus, lastBody)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||||
@@ -863,19 +770,25 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
|
|||||||
return rawJSON
|
return rawJSON
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newGeminiStatusErr(statusCode int, body []byte) statusErr {
|
||||||
|
err := statusErr{code: statusCode, msg: string(body)}
|
||||||
|
if statusCode == http.StatusTooManyRequests {
|
||||||
|
if retryAfter, parseErr := parseRetryDelay(body); parseErr == nil && retryAfter != nil {
|
||||||
|
err.retryAfter = retryAfter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
|
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
|
||||||
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
|
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
|
||||||
// Returns the duration to wait, or a default duration if parsing fails.
|
// Returns the parsed duration or an error if it cannot be determined.
|
||||||
func parseRetryDelay(errorBody []byte) time.Duration {
|
func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
|
||||||
const defaultDelay = 1 * time.Second
|
|
||||||
const maxDelay = 60 * time.Second
|
|
||||||
|
|
||||||
// Try to parse the retryDelay from the error response
|
// Try to parse the retryDelay from the error response
|
||||||
// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
|
// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
|
||||||
details := gjson.GetBytes(errorBody, "error.details")
|
details := gjson.GetBytes(errorBody, "error.details")
|
||||||
if !details.Exists() || !details.IsArray() {
|
if !details.Exists() || !details.IsArray() {
|
||||||
log.Debugf("parseRetryDelay: no error.details found, using default delay %v", defaultDelay)
|
return nil, fmt.Errorf("no error.details found")
|
||||||
return defaultDelay
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, detail := range details.Array() {
|
for _, detail := range details.Array() {
|
||||||
@@ -886,24 +799,12 @@ func parseRetryDelay(errorBody []byte) time.Duration {
|
|||||||
// Parse duration string like "0.847655010s"
|
// Parse duration string like "0.847655010s"
|
||||||
duration, err := time.ParseDuration(retryDelay)
|
duration, err := time.ParseDuration(retryDelay)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debugf("parseRetryDelay: failed to parse duration %q: %v, using default", retryDelay, err)
|
return nil, fmt.Errorf("failed to parse duration")
|
||||||
return defaultDelay
|
|
||||||
}
|
}
|
||||||
// Cap at maxDelay to prevent excessive waits
|
return &duration, nil
|
||||||
if duration > maxDelay {
|
|
||||||
log.Debugf("parseRetryDelay: capping delay from %v to %v", duration, maxDelay)
|
|
||||||
return maxDelay
|
|
||||||
}
|
|
||||||
if duration < 0 {
|
|
||||||
log.Debugf("parseRetryDelay: negative delay %v, using default", duration)
|
|
||||||
return defaultDelay
|
|
||||||
}
|
|
||||||
log.Debugf("parseRetryDelay: using delay %v from API response", duration)
|
|
||||||
return duration
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugf("parseRetryDelay: no RetryInfo found, using default delay %v", defaultDelay)
|
return nil, fmt.Errorf("no RetryInfo found")
|
||||||
return defaultDelay
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
@@ -340,8 +341,9 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
|
|||||||
}
|
}
|
||||||
|
|
||||||
type statusErr struct {
|
type statusErr struct {
|
||||||
code int
|
code int
|
||||||
msg string
|
msg string
|
||||||
|
retryAfter *time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e statusErr) Error() string {
|
func (e statusErr) Error() string {
|
||||||
@@ -350,4 +352,5 @@ func (e statusErr) Error() string {
|
|||||||
}
|
}
|
||||||
return fmt.Sprintf("status %d", e.code)
|
return fmt.Sprintf("status %d", e.code)
|
||||||
}
|
}
|
||||||
func (e statusErr) StatusCode() int { return e.code }
|
func (e statusErr) StatusCode() int { return e.code }
|
||||||
|
func (e statusErr) RetryAfter() *time.Duration { return e.retryAfter }
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
|
|||||||
rawJSON, _ = sjson.DeleteBytes(rawJSON, "max_completion_tokens")
|
rawJSON, _ = sjson.DeleteBytes(rawJSON, "max_completion_tokens")
|
||||||
rawJSON, _ = sjson.DeleteBytes(rawJSON, "temperature")
|
rawJSON, _ = sjson.DeleteBytes(rawJSON, "temperature")
|
||||||
rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p")
|
rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p")
|
||||||
|
rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier")
|
||||||
|
|
||||||
originalInstructions := ""
|
originalInstructions := ""
|
||||||
originalInstructionsText := ""
|
originalInstructionsText := ""
|
||||||
|
|||||||
@@ -62,6 +62,8 @@ type Result struct {
|
|||||||
Model string
|
Model string
|
||||||
// Success marks whether the execution succeeded.
|
// Success marks whether the execution succeeded.
|
||||||
Success bool
|
Success bool
|
||||||
|
// RetryAfter carries a provider supplied retry hint (e.g. 429 retryDelay).
|
||||||
|
RetryAfter *time.Duration
|
||||||
// Error describes the failure when Success is false.
|
// Error describes the failure when Success is false.
|
||||||
Error *Error
|
Error *Error
|
||||||
}
|
}
|
||||||
@@ -325,6 +327,9 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
|
|||||||
if errors.As(errExec, &se) && se != nil {
|
if errors.As(errExec, &se) && se != nil {
|
||||||
result.Error.HTTPStatus = se.StatusCode()
|
result.Error.HTTPStatus = se.StatusCode()
|
||||||
}
|
}
|
||||||
|
if ra := retryAfterFromError(errExec); ra != nil {
|
||||||
|
result.RetryAfter = ra
|
||||||
|
}
|
||||||
m.MarkResult(execCtx, result)
|
m.MarkResult(execCtx, result)
|
||||||
lastErr = errExec
|
lastErr = errExec
|
||||||
continue
|
continue
|
||||||
@@ -370,6 +375,9 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
|
|||||||
if errors.As(errExec, &se) && se != nil {
|
if errors.As(errExec, &se) && se != nil {
|
||||||
result.Error.HTTPStatus = se.StatusCode()
|
result.Error.HTTPStatus = se.StatusCode()
|
||||||
}
|
}
|
||||||
|
if ra := retryAfterFromError(errExec); ra != nil {
|
||||||
|
result.RetryAfter = ra
|
||||||
|
}
|
||||||
m.MarkResult(execCtx, result)
|
m.MarkResult(execCtx, result)
|
||||||
lastErr = errExec
|
lastErr = errExec
|
||||||
continue
|
continue
|
||||||
@@ -415,6 +423,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
|
|||||||
rerr.HTTPStatus = se.StatusCode()
|
rerr.HTTPStatus = se.StatusCode()
|
||||||
}
|
}
|
||||||
result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: false, Error: rerr}
|
result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: false, Error: rerr}
|
||||||
|
result.RetryAfter = retryAfterFromError(errStream)
|
||||||
m.MarkResult(execCtx, result)
|
m.MarkResult(execCtx, result)
|
||||||
lastErr = errStream
|
lastErr = errStream
|
||||||
continue
|
continue
|
||||||
@@ -556,17 +565,23 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
|
|||||||
suspendReason = "payment_required"
|
suspendReason = "payment_required"
|
||||||
shouldSuspendModel = true
|
shouldSuspendModel = true
|
||||||
case 429:
|
case 429:
|
||||||
cooldown, nextLevel := nextQuotaCooldown(state.Quota.BackoffLevel)
|
|
||||||
var next time.Time
|
var next time.Time
|
||||||
if cooldown > 0 {
|
backoffLevel := state.Quota.BackoffLevel
|
||||||
next = now.Add(cooldown)
|
if result.RetryAfter != nil {
|
||||||
|
next = now.Add(*result.RetryAfter)
|
||||||
|
} else {
|
||||||
|
cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
|
||||||
|
if cooldown > 0 {
|
||||||
|
next = now.Add(cooldown)
|
||||||
|
}
|
||||||
|
backoffLevel = nextLevel
|
||||||
}
|
}
|
||||||
state.NextRetryAfter = next
|
state.NextRetryAfter = next
|
||||||
state.Quota = QuotaState{
|
state.Quota = QuotaState{
|
||||||
Exceeded: true,
|
Exceeded: true,
|
||||||
Reason: "quota",
|
Reason: "quota",
|
||||||
NextRecoverAt: next,
|
NextRecoverAt: next,
|
||||||
BackoffLevel: nextLevel,
|
BackoffLevel: backoffLevel,
|
||||||
}
|
}
|
||||||
suspendReason = "quota"
|
suspendReason = "quota"
|
||||||
shouldSuspendModel = true
|
shouldSuspendModel = true
|
||||||
@@ -582,7 +597,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
|
|||||||
auth.UpdatedAt = now
|
auth.UpdatedAt = now
|
||||||
updateAggregatedAvailability(auth, now)
|
updateAggregatedAvailability(auth, now)
|
||||||
} else {
|
} else {
|
||||||
applyAuthFailureState(auth, result.Error, now)
|
applyAuthFailureState(auth, result.Error, result.RetryAfter, now)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -742,6 +757,25 @@ func cloneError(err *Error) *Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func retryAfterFromError(err error) *time.Duration {
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
type retryAfterProvider interface {
|
||||||
|
RetryAfter() *time.Duration
|
||||||
|
}
|
||||||
|
rap, ok := err.(retryAfterProvider)
|
||||||
|
if !ok || rap == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
retryAfter := rap.RetryAfter()
|
||||||
|
if retryAfter == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
val := *retryAfter
|
||||||
|
return &val
|
||||||
|
}
|
||||||
|
|
||||||
func statusCodeFromResult(err *Error) int {
|
func statusCodeFromResult(err *Error) int {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return 0
|
return 0
|
||||||
@@ -749,7 +783,7 @@ func statusCodeFromResult(err *Error) int {
|
|||||||
return err.StatusCode()
|
return err.StatusCode()
|
||||||
}
|
}
|
||||||
|
|
||||||
func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
|
func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Duration, now time.Time) {
|
||||||
if auth == nil {
|
if auth == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -774,13 +808,17 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
|
|||||||
auth.StatusMessage = "quota exhausted"
|
auth.StatusMessage = "quota exhausted"
|
||||||
auth.Quota.Exceeded = true
|
auth.Quota.Exceeded = true
|
||||||
auth.Quota.Reason = "quota"
|
auth.Quota.Reason = "quota"
|
||||||
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
|
|
||||||
var next time.Time
|
var next time.Time
|
||||||
if cooldown > 0 {
|
if retryAfter != nil {
|
||||||
next = now.Add(cooldown)
|
next = now.Add(*retryAfter)
|
||||||
|
} else {
|
||||||
|
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
|
||||||
|
if cooldown > 0 {
|
||||||
|
next = now.Add(cooldown)
|
||||||
|
}
|
||||||
|
auth.Quota.BackoffLevel = nextLevel
|
||||||
}
|
}
|
||||||
auth.Quota.NextRecoverAt = next
|
auth.Quota.NextRecoverAt = next
|
||||||
auth.Quota.BackoffLevel = nextLevel
|
|
||||||
auth.NextRetryAfter = next
|
auth.NextRetryAfter = next
|
||||||
case 408, 500, 502, 503, 504:
|
case 408, 500, 502, 503, 504:
|
||||||
auth.StatusMessage = "transient upstream error"
|
auth.StatusMessage = "transient upstream error"
|
||||||
|
|||||||
@@ -629,7 +629,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
|
|||||||
models = registry.GetGeminiModels()
|
models = registry.GetGeminiModels()
|
||||||
case "vertex":
|
case "vertex":
|
||||||
// Vertex AI Gemini supports the same model identifiers as Gemini.
|
// Vertex AI Gemini supports the same model identifiers as Gemini.
|
||||||
models = registry.GetGeminiModels()
|
models = registry.GetGeminiVertexModels()
|
||||||
case "gemini-cli":
|
case "gemini-cli":
|
||||||
models = registry.GetGeminiCLIModels()
|
models = registry.GetGeminiCLIModels()
|
||||||
case "aistudio":
|
case "aistudio":
|
||||||
|
|||||||
Reference in New Issue
Block a user