**refactor(translator): remove service_tier from Codex OpenAI request payload**

Merge pull request #285 from router-for-me/iflow
feat(iflow): add cookie-based authentication endpoint
2026-02-05 14:00:52 +08:00 · 2025-11-20 20:12:06 +08:00 · 2025-11-20 20:04:38 +08:00 · 2025-11-20 18:23:43 +08:00 · 2025-11-20 18:16:26 +08:00 · 2025-11-20 17:49:37 +08:00
12 changed files with 527 additions and 197 deletions
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -1443,6 +1443,87 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
 }

+func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
+	ctx := context.Background()
+
+	var payload struct {
+		Cookie string `json:"cookie"`
+	}
+	if err := c.ShouldBindJSON(&payload); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
+		return
+	}
+
+	cookieValue := strings.TrimSpace(payload.Cookie)
+
+	if cookieValue == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
+		return
+	}
+
+	cookieValue, errNormalize := iflowauth.NormalizeCookie(cookieValue)
+	if errNormalize != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errNormalize.Error()})
+		return
+	}
+
+	authSvc := iflowauth.NewIFlowAuth(h.cfg)
+	tokenData, errAuth := authSvc.AuthenticateWithCookie(ctx, cookieValue)
+	if errAuth != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errAuth.Error()})
+		return
+	}
+
+	tokenData.Cookie = cookieValue
+
+	tokenStorage := authSvc.CreateCookieTokenStorage(tokenData)
+	email := strings.TrimSpace(tokenStorage.Email)
+	if email == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "failed to extract email from token"})
+		return
+	}
+
+	fileName := iflowauth.SanitizeIFlowFileName(email)
+	if fileName == "" {
+		fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+	}
+
+	tokenStorage.Email = email
+
+	record := &coreauth.Auth{
+		ID:       fmt.Sprintf("iflow-%s.json", fileName),
+		Provider: "iflow",
+		FileName: fmt.Sprintf("iflow-%s.json", fileName),
+		Storage:  tokenStorage,
+		Metadata: map[string]any{
+			"email":        email,
+			"api_key":      tokenStorage.APIKey,
+			"expired":      tokenStorage.Expire,
+			"cookie":       tokenStorage.Cookie,
+			"type":         tokenStorage.Type,
+			"last_refresh": tokenStorage.LastRefresh,
+		},
+		Attributes: map[string]string{
+			"api_key": tokenStorage.APIKey,
+		},
+	}
+
+	savedPath, errSave := h.saveTokenRecord(ctx, record)
+	if errSave != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to save authentication tokens"})
+		return
+	}
+
+	fmt.Printf("iFlow cookie authentication successful. Token saved to %s\n", savedPath)
+	c.JSON(http.StatusOK, gin.H{
+		"status":     "ok",
+		"saved_path": savedPath,
+		"email":      email,
+		"expired":    tokenStorage.Expire,
+		"type":       tokenStorage.Type,
+	})
+}
+
 type projectSelectionRequiredError struct{}

 func (e *projectSelectionRequiredError) Error() string {
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -518,6 +518,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
 		mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
+		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
 		mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
 	}
 }
--- a/internal/auth/iflow/cookie_helpers.go
+++ b/internal/auth/iflow/cookie_helpers.go
@@ -0,0 +1,38 @@
+package iflow
+
+import (
+	"fmt"
+	"strings"
+)
+
+// NormalizeCookie normalizes raw cookie strings for iFlow authentication flows.
+func NormalizeCookie(raw string) (string, error) {
+	trimmed := strings.TrimSpace(raw)
+	if trimmed == "" {
+		return "", fmt.Errorf("cookie cannot be empty")
+	}
+
+	combined := strings.Join(strings.Fields(trimmed), " ")
+	if !strings.HasSuffix(combined, ";") {
+		combined += ";"
+	}
+	if !strings.Contains(combined, "BXAuth=") {
+		return "", fmt.Errorf("cookie missing BXAuth field")
+	}
+	return combined, nil
+}
+
+// SanitizeIFlowFileName normalizes user identifiers for safe filename usage.
+func SanitizeIFlowFileName(raw string) string {
+	if raw == "" {
+		return ""
+	}
+	cleanEmail := strings.ReplaceAll(raw, "*", "x")
+	var result strings.Builder
+	for _, r := range cleanEmail {
+		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '@' || r == '.' || r == '-' {
+			result.WriteRune(r)
+		}
+	}
+	return strings.TrimSpace(result.String())
+}
--- a/internal/cmd/iflow_cookie.go
+++ b/internal/cmd/iflow_cookie.go
@@ -71,22 +71,9 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
 		return "", fmt.Errorf("failed to read cookie: %w", err)
 	}

-	line = strings.TrimSpace(line)
-	if line == "" {
-		return "", fmt.Errorf("cookie cannot be empty")
-	}
-
-	// Clean up any extra whitespace and join multiple spaces
-	cookie := strings.Join(strings.Fields(line), " ")
-
-	// Ensure it ends properly
-	if !strings.HasSuffix(cookie, ";") {
-		cookie = cookie + ";"
-	}
-
-	// Ensure BXAuth is present in the cookie
-	if !strings.Contains(cookie, "BXAuth=") {
-		return "", fmt.Errorf("BXAuth field not found in cookie")
+	cookie, err := iflow.NormalizeCookie(line)
+	if err != nil {
+		return "", err
 	}

 	return cookie, nil
@@ -94,17 +81,6 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {

 // getAuthFilePath returns the auth file path for the given provider and email
 func getAuthFilePath(cfg *config.Config, provider, email string) string {
-	// Clean email to make it filename-safe
-	cleanEmail := strings.ReplaceAll(email, "*", "x")
-
-	// Remove any unsafe characters, but allow standard email chars (@, ., -)
-	var result strings.Builder
-	for _, r := range cleanEmail {
-		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') ||
-			r == '_' || r == '@' || r == '.' || r == '-' {
-			result.WriteRune(r)
-		}
-	}
-
-	return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, result.String())
+	fileName := iflow.SanitizeIFlowFileName(email)
+	return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, fileName)
 }
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -68,28 +68,13 @@ func GetClaudeModels() []*ModelInfo {
 	}
 }

-// GeminiModels returns the shared base Gemini model set used by multiple providers.
-func GeminiModels() []*ModelInfo {
+// GetGeminiModels returns the standard Gemini model definitions
+func GetGeminiModels() []*ModelInfo {
 	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
@@ -101,10 +86,25 @@ func GeminiModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
@@ -116,34 +116,110 @@ func GeminiModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
 	}
 }

-// GetGeminiModels returns the standard Gemini model definitions
-func GetGeminiModels() []*ModelInfo { return GeminiModels() }
+func GetGeminiVertexModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		},
+	}
+}

 // GetGeminiCLIModels returns the standard Gemini model definitions
 func GetGeminiCLIModels() []*ModelInfo {
 	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
@@ -155,10 +231,25 @@ func GetGeminiCLIModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
@@ -170,122 +261,163 @@ func GetGeminiCLIModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
-		// {
-		// 	ID:                         "gemini-3-pro-preview-11-2025",
-		// 	Object:                     "model",
-		// 	Created:                    time.Now().Unix(),
-		// 	OwnedBy:                    "google",
-		// 	Type:                       "gemini",
-		// 	Name:                       "models/gemini-3-pro-preview-11-2025",
-		// 	Version:                    "3",
-		// 	DisplayName:                "Gemini 3 Pro Preview 11-2025",
-		// 	Description:                "Latest preview of Gemini Pro",
-		// 	InputTokenLimit:            1048576,
-		// 	OutputTokenLimit:           65536,
-		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		// 	Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		// },
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
 	}
 }

 // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
 func GetAIStudioModels() []*ModelInfo {
-	base := GeminiModels()
-
-	return append(base,
-		[]*ModelInfo{
-			{
-				ID:                         "gemini-3-pro-preview",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-3-pro-preview",
-				Version:                    "3.0",
-				DisplayName:                "Gemini 3 Pro Preview",
-				Description:                "Gemini 3 Pro Preview",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-pro-latest",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-pro-latest",
-				Version:                    "2.5",
-				DisplayName:                "Gemini Pro Latest",
-				Description:                "Latest release of Gemini Pro",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-flash-latest",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-flash-latest",
-				Version:                    "2.5",
-				DisplayName:                "Gemini Flash Latest",
-				Description:                "Latest release of Gemini Flash",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-flash-lite-latest",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-flash-lite-latest",
-				Version:                    "2.5",
-				DisplayName:                "Gemini Flash-Lite Latest",
-				Description:                "Latest release of Gemini Flash-Lite",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-2.5-flash-image-preview",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-2.5-flash-image-preview",
-				Version:                    "2.5",
-				DisplayName:                "Gemini 2.5 Flash Image Preview",
-				Description:                "State-of-the-art image generation and editing model.",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           8192,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				// image models don't support thinkingConfig; leave Thinking nil
-			},
-			{
-				ID:                         "gemini-2.5-flash-image",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-2.5-flash-image",
-				Version:                    "2.5",
-				DisplayName:                "Gemini 2.5 Flash Image",
-				Description:                "State-of-the-art image generation and editing model.",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           8192,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				// image models don't support thinkingConfig; leave Thinking nil
-			},
-		}...,
-	)
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-pro-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-pro-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Pro Latest",
+			Description:                "Latest release of Gemini Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash Latest",
+			Description:                "Latest release of Gemini Flash",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-lite-latest",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-lite-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash-Lite Latest",
+			Description:                "Latest release of Gemini Flash-Lite",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-image-preview",
+			Object:                     "model",
+			Created:                    1756166400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-image-preview",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Image Preview",
+			Description:                "State-of-the-art image generation and editing model.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           8192,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			// image models don't support thinkingConfig; leave Thinking nil
+		},
+		{
+			ID:                         "gemini-2.5-flash-image",
+			Object:                     "model",
+			Created:                    1759363200,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-image",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Image",
+			Description:                "State-of-the-art image generation and editing model.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           8192,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			// image models don't support thinkingConfig; leave Thinking nil
+		},
+	}
 }

 // GetOpenAIModels returns the standard OpenAI model definitions
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -180,7 +180,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 			continue
 		}

-		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
+		err = newGeminiStatusErr(httpResp.StatusCode, data)
 		return resp, err
 	}

@@ -190,7 +190,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	if lastStatus == 0 {
 		lastStatus = 429
 	}
-	err = statusErr{code: lastStatus, msg: string(lastBody)}
+	err = newGeminiStatusErr(lastStatus, lastBody)
 	return resp, err
 }

@@ -304,7 +304,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 				}
 				continue
 			}
-			err = statusErr{code: httpResp.StatusCode, msg: string(data)}
+			err = newGeminiStatusErr(httpResp.StatusCode, data)
 			return nil, err
 		}

@@ -377,7 +377,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	if lastStatus == 0 {
 		lastStatus = 429
 	}
-	err = statusErr{code: lastStatus, msg: string(lastBody)}
+	err = newGeminiStatusErr(lastStatus, lastBody)
 	return nil, err
 }

@@ -485,7 +485,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	if lastStatus == 0 {
 		lastStatus = 429
 	}
-	return cliproxyexecutor.Response{}, statusErr{code: lastStatus, msg: string(lastBody)}
+	return cliproxyexecutor.Response{}, newGeminiStatusErr(lastStatus, lastBody)
 }

 func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
@@ -769,3 +769,42 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
 	}
 	return rawJSON
 }
+
+func newGeminiStatusErr(statusCode int, body []byte) statusErr {
+	err := statusErr{code: statusCode, msg: string(body)}
+	if statusCode == http.StatusTooManyRequests {
+		if retryAfter, parseErr := parseRetryDelay(body); parseErr == nil && retryAfter != nil {
+			err.retryAfter = retryAfter
+		}
+	}
+	return err
+}
+
+// parseRetryDelay extracts the retry delay from a Google API 429 error response.
+// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
+// Returns the parsed duration or an error if it cannot be determined.
+func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
+	// Try to parse the retryDelay from the error response
+	// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
+	details := gjson.GetBytes(errorBody, "error.details")
+	if !details.Exists() || !details.IsArray() {
+		return nil, fmt.Errorf("no error.details found")
+	}
+
+	for _, detail := range details.Array() {
+		typeVal := detail.Get("@type").String()
+		if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
+			retryDelay := detail.Get("retryDelay").String()
+			if retryDelay != "" {
+				// Parse duration string like "0.847655010s"
+				duration, err := time.ParseDuration(retryDelay)
+				if err != nil {
+					return nil, fmt.Errorf("failed to parse duration")
+				}
+				return &duration, nil
+			}
+		}
+	}
+
+	return nil, fmt.Errorf("no RetryInfo found")
+}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -340,8 +341,9 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
 }

 type statusErr struct {
-	code int
-	msg  string
+	code       int
+	msg        string
+	retryAfter *time.Duration
 }

 func (e statusErr) Error() string {
@@ -350,4 +352,5 @@ func (e statusErr) Error() string {
 	}
 	return fmt.Sprintf("status %d", e.code)
 }
-func (e statusErr) StatusCode() int { return e.code }
+func (e statusErr) StatusCode() int            { return e.code }
+func (e statusErr) RetryAfter() *time.Duration { return e.retryAfter }
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -22,6 +22,7 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "max_completion_tokens")
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "temperature")
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p")
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier")

 	originalInstructions := ""
 	originalInstructionsText := ""
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -88,6 +88,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		}
 	}

+	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
+	// This matches the official Gemini CLI behavior which always sends:
+	// { thinkingBudget: -1, includeThoughts: true }
+	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
+	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
+		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
+		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	}
+
 	// Temperature/top_p/top_k
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -6,6 +6,7 @@ import (

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -294,6 +295,17 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			}
 		}
 	}
+
+	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
+	// This matches the official Gemini CLI behavior which always sends:
+	// { thinkingBudget: -1, includeThoughts: true }
+	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
+	if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
+		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
+		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
+		log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
+	}
+
 	result := []byte(out)
 	result = common.AttachDefaultSafetySettings(result, "safetySettings")
 	return result
--- a/sdk/cliproxy/auth/manager.go
+++ b/sdk/cliproxy/auth/manager.go
@@ -62,6 +62,8 @@ type Result struct {
 	Model string
 	// Success marks whether the execution succeeded.
 	Success bool
+	// RetryAfter carries a provider supplied retry hint (e.g. 429 retryDelay).
+	RetryAfter *time.Duration
 	// Error describes the failure when Success is false.
 	Error *Error
 }
@@ -325,6 +327,9 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
 			if errors.As(errExec, &se) && se != nil {
 				result.Error.HTTPStatus = se.StatusCode()
 			}
+			if ra := retryAfterFromError(errExec); ra != nil {
+				result.RetryAfter = ra
+			}
 			m.MarkResult(execCtx, result)
 			lastErr = errExec
 			continue
@@ -370,6 +375,9 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
 			if errors.As(errExec, &se) && se != nil {
 				result.Error.HTTPStatus = se.StatusCode()
 			}
+			if ra := retryAfterFromError(errExec); ra != nil {
+				result.RetryAfter = ra
+			}
 			m.MarkResult(execCtx, result)
 			lastErr = errExec
 			continue
@@ -415,6 +423,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
 				rerr.HTTPStatus = se.StatusCode()
 			}
 			result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: false, Error: rerr}
+			result.RetryAfter = retryAfterFromError(errStream)
 			m.MarkResult(execCtx, result)
 			lastErr = errStream
 			continue
@@ -556,17 +565,23 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 					suspendReason = "payment_required"
 					shouldSuspendModel = true
 				case 429:
-					cooldown, nextLevel := nextQuotaCooldown(state.Quota.BackoffLevel)
 					var next time.Time
-					if cooldown > 0 {
-						next = now.Add(cooldown)
+					backoffLevel := state.Quota.BackoffLevel
+					if result.RetryAfter != nil {
+						next = now.Add(*result.RetryAfter)
+					} else {
+						cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
+						if cooldown > 0 {
+							next = now.Add(cooldown)
+						}
+						backoffLevel = nextLevel
 					}
 					state.NextRetryAfter = next
 					state.Quota = QuotaState{
 						Exceeded:      true,
 						Reason:        "quota",
 						NextRecoverAt: next,
-						BackoffLevel:  nextLevel,
+						BackoffLevel:  backoffLevel,
 					}
 					suspendReason = "quota"
 					shouldSuspendModel = true
@@ -582,7 +597,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 				auth.UpdatedAt = now
 				updateAggregatedAvailability(auth, now)
 			} else {
-				applyAuthFailureState(auth, result.Error, now)
+				applyAuthFailureState(auth, result.Error, result.RetryAfter, now)
 			}
 		}

@@ -742,6 +757,25 @@ func cloneError(err *Error) *Error {
 	}
 }

+func retryAfterFromError(err error) *time.Duration {
+	if err == nil {
+		return nil
+	}
+	type retryAfterProvider interface {
+		RetryAfter() *time.Duration
+	}
+	rap, ok := err.(retryAfterProvider)
+	if !ok || rap == nil {
+		return nil
+	}
+	retryAfter := rap.RetryAfter()
+	if retryAfter == nil {
+		return nil
+	}
+	val := *retryAfter
+	return &val
+}
+
 func statusCodeFromResult(err *Error) int {
 	if err == nil {
 		return 0
@@ -749,7 +783,7 @@ func statusCodeFromResult(err *Error) int {
 	return err.StatusCode()
 }

-func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
+func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Duration, now time.Time) {
 	if auth == nil {
 		return
 	}
@@ -774,13 +808,17 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
 		auth.StatusMessage = "quota exhausted"
 		auth.Quota.Exceeded = true
 		auth.Quota.Reason = "quota"
-		cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
 		var next time.Time
-		if cooldown > 0 {
-			next = now.Add(cooldown)
+		if retryAfter != nil {
+			next = now.Add(*retryAfter)
+		} else {
+			cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
+			if cooldown > 0 {
+				next = now.Add(cooldown)
+			}
+			auth.Quota.BackoffLevel = nextLevel
 		}
 		auth.Quota.NextRecoverAt = next
-		auth.Quota.BackoffLevel = nextLevel
 		auth.NextRetryAfter = next
 	case 408, 500, 502, 503, 504:
 		auth.StatusMessage = "transient upstream error"
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -629,7 +629,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 		models = registry.GetGeminiModels()
 	case "vertex":
 		// Vertex AI Gemini supports the same model identifiers as Gemini.
-		models = registry.GetGeminiModels()
+		models = registry.GetGeminiVertexModels()
 	case "gemini-cli":
 		models = registry.GetGeminiCLIModels()
 	case "aistudio":
Author	SHA1	Message	Date
Luis Pater	98596c0a3f	refactor(translator): remove `service_tier` from Codex OpenAI request payload	2025-11-20 20:12:06 +08:00
Luis Pater	670ce2e528	Merge pull request #285 from router-for-me/iflow feat(iflow): add cookie-based authentication endpoint	2025-11-20 20:04:38 +08:00
hkfires	3f4f8b3b2d	feat(iflow): add cookie-based authentication endpoint	2025-11-20 18:23:43 +08:00
Luis Pater	371324c090	feat(registry): expand Gemini model definitions and support Vertex AI	2025-11-20 18:16:26 +08:00
Luis Pater	d50b0f7524	refactor(executor): simplify Gemini CLI execution and remove internal retry logic - Removed nested retry handling for 429 rate limit errors. - Simplified request/response handling by cleaning redundant retry-related code. - Eliminated `parseRetryDelay` function and max retry configuration logic.	2025-11-20 17:49:37 +08:00
Luis Pater	0586da9c2b	refactor(registry): move Gemini 3 Pro Preview model definition to base set	2025-11-20 10:51:16 +08:00
Luis Pater	618511ff67	Merge pull request #280 from ben-vargas/feat-enable-gemini-3-cli feat: enable Gemini 3 Pro Preview with OAuth support	2025-11-20 08:46:57 +08:00
Ben Vargas	0ff094b87f	fix(executor): prevent streaming on failed response when no fallback Fix critical bug where ExecuteStream would create a streaming channel from a failed (non-2xx) response after exhausting all retries with no fallback models available. When retries were exhausted on the last model, the code would break from the inner loop but fall through to streaming channel creation (line 401), immediately returning at line 461. This made the error handling code at lines 464-471 unreachable, causing clients to receive an empty/closed stream instead of a proper error response. Solution: Check if httpResp is non-2xx before creating the streaming channel. If failed, continue the outer loop to reach error handling. Identified by: codex-bot review Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484560423	2025-11-19 13:14:40 -07:00
Ben Vargas	ed23472d94	fix(executor): prevent streaming from 429 response when fallback available Fix critical bug where ExecuteStream would create a streaming channel using a 429 error response instead of continuing to the next fallback model after exhausting retries. When 429 retries were exhausted and a fallback model was available, the inner retry loop would break but immediately fall through to the streaming channel creation, attempting to stream from the failed 429 response instead of trying the next model. Solution: Add shouldContinueToNextModel flag to explicitly skip the streaming logic and continue the outer model loop when appropriate. Identified by: codex-bot review Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484479106	2025-11-19 13:05:38 -07:00
Ben Vargas	ede4471b84	feat(translator): add default thinkingConfig for gemini-3-pro-preview Match official Gemini CLI behavior by always sending default thinkingConfig when client doesn't specify reasoning parameters. - Set thinkingBudget=-1 (dynamic) for gemini-3-pro-preview - Set include_thoughts=true to return thinking process - Apply to both /v1/chat/completions and /v1/responses endpoints - See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts	2025-11-19 12:47:39 -07:00
Ben Vargas	6a3de3a89c	feat(executor): add intelligent retry logic for 429 rate limits Implement Google RetryInfo.retryDelay support for handling 429 rate limit errors. Retries same model up to 3 times using exact delays from Google's API before trying fallback models. - Add parseRetryDelay() to extract Google's retry guidance - Implement inner retry loop in Execute() and ExecuteStream() - Context-aware waiting with cancellation support - Cap delays at 60s maximum for safety	2025-11-19 12:47:39 -07:00
Ben Vargas	782bba0bc4	feat(registry): enable gemini-3-pro-preview for gemini-cli provider Add gemini-3-pro-preview model to GetGeminiCLIModels() to make it available for OAuth-based Gemini CLI users, matching the model already available in AI Studio provider. Model spec: - ID: gemini-3-pro-preview - Version: 3.0 - Input: 1M tokens - Output: 64K tokens - Thinking: 128-32K tokens (dynamic)	2025-11-19 12:47:39 -07:00