**refactor(translator): remove service_tier from Codex OpenAI request payload**

Merge pull request #285 from router-for-me/iflow
feat(iflow): add cookie-based authentication endpoint
2026-02-02 12:30:50 +08:00 · 2025-11-20 20:12:06 +08:00 · 2025-11-20 20:04:38 +08:00 · 2025-11-20 18:23:43 +08:00 · 2025-11-20 18:16:26 +08:00 · 2025-11-20 17:49:37 +08:00
22 changed files with 816 additions and 203 deletions
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -292,6 +292,7 @@ func (h *Handler) buildAuthFileEntry(auth *coreauth.Auth) gin.H {
 	if auth == nil {
 		return nil
 	}
+	auth.EnsureIndex()
 	runtimeOnly := isRuntimeOnlyAuth(auth)
 	if runtimeOnly && (auth.Disabled || auth.Status == coreauth.StatusDisabled) {
 		return nil
@@ -306,6 +307,7 @@ func (h *Handler) buildAuthFileEntry(auth *coreauth.Auth) gin.H {
 	}
 	entry := gin.H{
 		"id":             auth.ID,
+		"auth_index":     auth.Index,
 		"name":           name,
 		"type":           strings.TrimSpace(auth.Provider),
 		"provider":       strings.TrimSpace(auth.Provider),
@@ -1441,6 +1443,87 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
 }

+func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
+	ctx := context.Background()
+
+	var payload struct {
+		Cookie string `json:"cookie"`
+	}
+	if err := c.ShouldBindJSON(&payload); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
+		return
+	}
+
+	cookieValue := strings.TrimSpace(payload.Cookie)
+
+	if cookieValue == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "cookie is required"})
+		return
+	}
+
+	cookieValue, errNormalize := iflowauth.NormalizeCookie(cookieValue)
+	if errNormalize != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errNormalize.Error()})
+		return
+	}
+
+	authSvc := iflowauth.NewIFlowAuth(h.cfg)
+	tokenData, errAuth := authSvc.AuthenticateWithCookie(ctx, cookieValue)
+	if errAuth != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": errAuth.Error()})
+		return
+	}
+
+	tokenData.Cookie = cookieValue
+
+	tokenStorage := authSvc.CreateCookieTokenStorage(tokenData)
+	email := strings.TrimSpace(tokenStorage.Email)
+	if email == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "failed to extract email from token"})
+		return
+	}
+
+	fileName := iflowauth.SanitizeIFlowFileName(email)
+	if fileName == "" {
+		fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+	}
+
+	tokenStorage.Email = email
+
+	record := &coreauth.Auth{
+		ID:       fmt.Sprintf("iflow-%s.json", fileName),
+		Provider: "iflow",
+		FileName: fmt.Sprintf("iflow-%s.json", fileName),
+		Storage:  tokenStorage,
+		Metadata: map[string]any{
+			"email":        email,
+			"api_key":      tokenStorage.APIKey,
+			"expired":      tokenStorage.Expire,
+			"cookie":       tokenStorage.Cookie,
+			"type":         tokenStorage.Type,
+			"last_refresh": tokenStorage.LastRefresh,
+		},
+		Attributes: map[string]string{
+			"api_key": tokenStorage.APIKey,
+		},
+	}
+
+	savedPath, errSave := h.saveTokenRecord(ctx, record)
+	if errSave != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to save authentication tokens"})
+		return
+	}
+
+	fmt.Printf("iFlow cookie authentication successful. Token saved to %s\n", savedPath)
+	c.JSON(http.StatusOK, gin.H{
+		"status":     "ok",
+		"saved_path": savedPath,
+		"email":      email,
+		"expired":    tokenStorage.Expire,
+		"type":       tokenStorage.Type,
+	})
+}
+
 type projectSelectionRequiredError struct{}

 func (e *projectSelectionRequiredError) Error() string {
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -518,6 +518,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
 		mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
+		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
 		mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
 	}
 }
--- a/internal/auth/iflow/cookie_helpers.go
+++ b/internal/auth/iflow/cookie_helpers.go
@@ -0,0 +1,38 @@
+package iflow
+
+import (
+	"fmt"
+	"strings"
+)
+
+// NormalizeCookie normalizes raw cookie strings for iFlow authentication flows.
+func NormalizeCookie(raw string) (string, error) {
+	trimmed := strings.TrimSpace(raw)
+	if trimmed == "" {
+		return "", fmt.Errorf("cookie cannot be empty")
+	}
+
+	combined := strings.Join(strings.Fields(trimmed), " ")
+	if !strings.HasSuffix(combined, ";") {
+		combined += ";"
+	}
+	if !strings.Contains(combined, "BXAuth=") {
+		return "", fmt.Errorf("cookie missing BXAuth field")
+	}
+	return combined, nil
+}
+
+// SanitizeIFlowFileName normalizes user identifiers for safe filename usage.
+func SanitizeIFlowFileName(raw string) string {
+	if raw == "" {
+		return ""
+	}
+	cleanEmail := strings.ReplaceAll(raw, "*", "x")
+	var result strings.Builder
+	for _, r := range cleanEmail {
+		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '@' || r == '.' || r == '-' {
+			result.WriteRune(r)
+		}
+	}
+	return strings.TrimSpace(result.String())
+}
--- a/internal/cmd/iflow_cookie.go
+++ b/internal/cmd/iflow_cookie.go
@@ -71,22 +71,9 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
 		return "", fmt.Errorf("failed to read cookie: %w", err)
 	}

-	line = strings.TrimSpace(line)
-	if line == "" {
-		return "", fmt.Errorf("cookie cannot be empty")
-	}
-
-	// Clean up any extra whitespace and join multiple spaces
-	cookie := strings.Join(strings.Fields(line), " ")
-
-	// Ensure it ends properly
-	if !strings.HasSuffix(cookie, ";") {
-		cookie = cookie + ";"
-	}
-
-	// Ensure BXAuth is present in the cookie
-	if !strings.Contains(cookie, "BXAuth=") {
-		return "", fmt.Errorf("BXAuth field not found in cookie")
+	cookie, err := iflow.NormalizeCookie(line)
+	if err != nil {
+		return "", err
 	}

 	return cookie, nil
@@ -94,18 +81,6 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {

 // getAuthFilePath returns the auth file path for the given provider and email
 func getAuthFilePath(cfg *config.Config, provider, email string) string {
-	// Clean email to make it filename-safe
-	cleanEmail := strings.ReplaceAll(email, "@", "_at_")
-	cleanEmail = strings.ReplaceAll(cleanEmail, ".", "_")
-	cleanEmail = strings.ReplaceAll(cleanEmail, "-", "_")
-
-	// Remove any remaining special characters
-	var result strings.Builder
-	for _, r := range cleanEmail {
-		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' {
-			result.WriteRune(r)
-		}
-	}
-
-	return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, result.String())
+	fileName := iflow.SanitizeIFlowFileName(email)
+	return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, fileName)
 }
--- a/internal/misc/codex_instructions.go
+++ b/internal/misc/codex_instructions.go
@@ -17,6 +17,7 @@ func CodexInstructionsForModel(modelName, systemInstructions string) (bool, stri

 	lastPrompt := ""
 	lastCodexPrompt := ""
+	lastCodexMaxPrompt := ""
 	last51Prompt := ""
 	// lastReviewPrompt := ""
 	for _, entry := range entries {
@@ -26,6 +27,8 @@ func CodexInstructionsForModel(modelName, systemInstructions string) (bool, stri
 		}
 		if strings.HasPrefix(entry.Name(), "gpt_5_codex_prompt.md") {
 			lastCodexPrompt = string(content)
+		} else if strings.HasPrefix(entry.Name(), "gpt-5.1-codex-max_prompt.md") {
+			lastCodexMaxPrompt = string(content)
 		} else if strings.HasPrefix(entry.Name(), "prompt.md") {
 			lastPrompt = string(content)
 		} else if strings.HasPrefix(entry.Name(), "gpt_5_1_prompt.md") {
@@ -34,8 +37,9 @@ func CodexInstructionsForModel(modelName, systemInstructions string) (bool, stri
 			// lastReviewPrompt = string(content)
 		}
 	}
-
-	if strings.Contains(modelName, "codex") {
+	if strings.Contains(modelName, "codex-max") {
+		return false, lastCodexMaxPrompt
+	} else if strings.Contains(modelName, "codex") {
 		return false, lastCodexPrompt
 	} else if strings.Contains(modelName, "5.1") {
 		return false, last51Prompt
--- a/internal/misc/codex_instructions/gpt-5.1-codex-max_prompt.md-001-d5dfba250975b4519fed9b8abf99bbd6c31e6f33
+++ b/internal/misc/codex_instructions/gpt-5.1-codex-max_prompt.md-001-d5dfba250975b4519fed9b8abf99bbd6c31e6f33
@@ -0,0 +1,117 @@
+You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.
+
+## General
+
+- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
+
+## Editing constraints
+
+- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.
+- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
+- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).
+- You may be in a dirty git worktree.
+    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
+    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
+    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
+    * If the changes are in unrelated files, just ignore them and don't revert them.
+- Do not amend a commit unless explicitly requested to do so.
+- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
+- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
+
+## Plan tool
+
+When using the planning tool:
+- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
+- Do not make single-step plans.
+- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
+
+## Codex CLI harness, sandboxing, and approvals
+
+The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.
+
+Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:
+- **read-only**: The sandbox only permits reading files.
+- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.
+- **danger-full-access**: No filesystem sandboxing - all commands are permitted.
+
+Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:
+- **restricted**: Requires approval
+- **enabled**: No approval needed
+
+Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are
+- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
+- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
+- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
+- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
+
+When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
+- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
+- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
+- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
+- (for all of these, you should weigh alternative paths that do not require approval)
+
+When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read.
+
+You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure.
+
+Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
+
+When requesting approval to execute a command that will require escalated privileges:
+  - Provide the `with_escalated_permissions` parameter with the boolean value true
+  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter
+
+## Special user requests
+
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
+- If the user asks for a "review", default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.
+
+## Frontend tasks
+When doing frontend design tasks, avoid collapsing into "AI slop" or safe, average-looking layouts.
+Aim for interfaces that feel intentional, bold, and a bit surprising.
+- Typography: Use expressive, purposeful fonts and avoid default stacks (Inter, Roboto, Arial, system).
+- Color & Look: Choose a clear visual direction; define CSS variables; avoid purple-on-white defaults. No purple bias or dark mode bias.
+- Motion: Use a few meaningful animations (page-load, staggered reveals) instead of generic micro-motions.
+- Background: Don't rely on flat, single-color backgrounds; use gradients, shapes, or subtle patterns to build atmosphere.
+- Overall: Avoid boilerplate layouts and interchangeable UI patterns. Vary themes, type families, and visual languages across outputs.
+- Ensure the page loads properly on both desktop and mobile
+
+Exception: If working within an existing website or design system, preserve the established patterns, structure, and visual language.
+
+## Presenting your work and final message
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+- Default: be very concise; friendly coding teammate tone.
+- Ask only when needed; suggest ideas; mirror the user's style.
+- For substantial work, summarize clearly; follow final‑answer formatting.
+- Skip heavy formatting for simple confirmations.
+- Don't dump large files you've written; reference paths only.
+- No "save/copy this file" - User is on the same machine.
+- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.
+- For code changes:
+  * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with "summary", just jump right in.
+  * If there are natural next steps the user may want to take, suggest them at the end of your response. Do not make suggestions if there are no natural next steps.
+  * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.
+- The user does not command execution outputs. When asked to show the output of a command (e.g. `git show`), relay the important details in your answer or summarize the key lines so the user understands the result.
+
+### Final answer structure and style guidelines
+
+- Plain text; CLI handles styling. Use structure only when it helps scanability.
+- Headers: optional; short Title Case (1-3 words) wrapped in **…**; no blank line before the first bullet; add only if they truly help.
+- Bullets: use - ; merge related points; keep to one line when possible; 4–6 per list ordered by importance; keep phrasing consistent.
+- Monospace: backticks for commands/paths/env vars/code ids and inline examples; use for literal keyword bullets; never combine with **.
+- Code samples or multi-line snippets should be wrapped in fenced code blocks; include an info string as often as possible.
+- Structure: group related bullets; order sections general → specific → supporting; for subsections, start with a bolded keyword bullet, then items; match complexity to the task.
+- Tone: collaborative, concise, factual; present tense, active voice; self‑contained; no "above/below"; parallel wording.
+- Don'ts: no nested bullets/hierarchies; no ANSI codes; don't cram unrelated keywords; keep keyword lists short—wrap/reformat if long; avoid naming formatting styles in answers.
+- Adaptation: code explanations → precise, structured with code refs; simple tasks → lead with outcome; big changes → logical walkthrough + rationale + next actions; casual one-offs → plain sentences, no headers/bullets.
+- File References: When referencing files in your response follow the below rules:
+  * Use inline code to make file paths clickable.
+  * Each reference should have a stand alone path. Even if it's the same file.
+  * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix.
+  * Optionally include line/column (1‑based): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
+  * Do not use URIs like file://, vscode://, or https://.
+  * Do not provide range of lines
+  * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -68,28 +68,13 @@ func GetClaudeModels() []*ModelInfo {
 	}
 }

-// GeminiModels returns the shared base Gemini model set used by multiple providers.
-func GeminiModels() []*ModelInfo {
+// GetGeminiModels returns the standard Gemini model definitions
+func GetGeminiModels() []*ModelInfo {
 	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
@@ -101,10 +86,25 @@ func GeminiModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
@@ -116,34 +116,110 @@ func GeminiModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
 	}
 }

-// GetGeminiModels returns the standard Gemini model definitions
-func GetGeminiModels() []*ModelInfo { return GeminiModels() }
+func GetGeminiVertexModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		},
+	}
+}

 // GetGeminiCLIModels returns the standard Gemini model definitions
 func GetGeminiCLIModels() []*ModelInfo {
 	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
@@ -155,10 +231,25 @@ func GetGeminiCLIModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
-			Created:                    time.Now().Unix(),
+			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
@@ -170,122 +261,163 @@ func GetGeminiCLIModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
-		// {
-		// 	ID:                         "gemini-3-pro-preview-11-2025",
-		// 	Object:                     "model",
-		// 	Created:                    time.Now().Unix(),
-		// 	OwnedBy:                    "google",
-		// 	Type:                       "gemini",
-		// 	Name:                       "models/gemini-3-pro-preview-11-2025",
-		// 	Version:                    "3",
-		// 	DisplayName:                "Gemini 3 Pro Preview 11-2025",
-		// 	Description:                "Latest preview of Gemini Pro",
-		// 	InputTokenLimit:            1048576,
-		// 	OutputTokenLimit:           65536,
-		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		// 	Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		// },
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
 	}
 }

 // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
 func GetAIStudioModels() []*ModelInfo {
-	base := GeminiModels()
-
-	return append(base,
-		[]*ModelInfo{
-			{
-				ID:                         "gemini-3-pro-preview",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-3-pro-preview",
-				Version:                    "3.0",
-				DisplayName:                "Gemini 3 Pro Preview",
-				Description:                "Gemini 3 Pro Preview",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-pro-latest",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-pro-latest",
-				Version:                    "2.5",
-				DisplayName:                "Gemini Pro Latest",
-				Description:                "Latest release of Gemini Pro",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-flash-latest",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-flash-latest",
-				Version:                    "2.5",
-				DisplayName:                "Gemini Flash Latest",
-				Description:                "Latest release of Gemini Flash",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-flash-lite-latest",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-flash-lite-latest",
-				Version:                    "2.5",
-				DisplayName:                "Gemini Flash-Lite Latest",
-				Description:                "Latest release of Gemini Flash-Lite",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-			},
-			{
-				ID:                         "gemini-2.5-flash-image-preview",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-2.5-flash-image-preview",
-				Version:                    "2.5",
-				DisplayName:                "Gemini 2.5 Flash Image Preview",
-				Description:                "State-of-the-art image generation and editing model.",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           8192,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				// image models don't support thinkingConfig; leave Thinking nil
-			},
-			{
-				ID:                         "gemini-2.5-flash-image",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-2.5-flash-image",
-				Version:                    "2.5",
-				DisplayName:                "Gemini 2.5 Flash Image",
-				Description:                "State-of-the-art image generation and editing model.",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           8192,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				// image models don't support thinkingConfig; leave Thinking nil
-			},
-		}...,
-	)
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-pro-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-pro-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Pro Latest",
+			Description:                "Latest release of Gemini Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash Latest",
+			Description:                "Latest release of Gemini Flash",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-lite-latest",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-lite-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash-Lite Latest",
+			Description:                "Latest release of Gemini Flash-Lite",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-image-preview",
+			Object:                     "model",
+			Created:                    1756166400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-image-preview",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Image Preview",
+			Description:                "State-of-the-art image generation and editing model.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           8192,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			// image models don't support thinkingConfig; leave Thinking nil
+		},
+		{
+			ID:                         "gemini-2.5-flash-image",
+			Object:                     "model",
+			Created:                    1759363200,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-image",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Image",
+			Description:                "State-of-the-art image generation and editing model.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           8192,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			// image models don't support thinkingConfig; leave Thinking nil
+		},
+	}
 }

 // GetOpenAIModels returns the standard OpenAI model definitions
@@ -603,6 +735,72 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
+
+		{
+			ID:                  "gpt-5.1-codex-max",
+			Object:              "model",
+			Created:             time.Now().Unix(),
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5 Codex Max",
+			Description:         "Stable version of GPT 5 Codex Max",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max-low",
+			Object:              "model",
+			Created:             time.Now().Unix(),
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5 Codex Max Low",
+			Description:         "Stable version of GPT 5 Codex Max Low",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max-medium",
+			Object:              "model",
+			Created:             time.Now().Unix(),
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5 Codex Max Medium",
+			Description:         "Stable version of GPT 5 Codex Max Medium",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max-high",
+			Object:              "model",
+			Created:             time.Now().Unix(),
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5 Codex Max High",
+			Description:         "Stable version of GPT 5 Codex Max High",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max-xhigh",
+			Object:              "model",
+			Created:             time.Now().Unix(),
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5 Codex Max XHigh",
+			Description:         "Stable version of GPT 5 Codex Max XHigh",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+		},
 	}
 }

--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -322,6 +322,18 @@ func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []by
 		case "gpt-5.1-codex-mini-high":
 			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
 		}
+	} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
+		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
+		switch modelName {
+		case "gpt-5.1-codex-max-low":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
+		case "gpt-5.1-codex-max-medium":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
+		case "gpt-5.1-codex-max-high":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
+		case "gpt-5.1-codex-max-xhigh":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
+		}
 	}
 	return payload
 }
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -180,7 +180,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 			continue
 		}

-		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
+		err = newGeminiStatusErr(httpResp.StatusCode, data)
 		return resp, err
 	}

@@ -190,7 +190,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	if lastStatus == 0 {
 		lastStatus = 429
 	}
-	err = statusErr{code: lastStatus, msg: string(lastBody)}
+	err = newGeminiStatusErr(lastStatus, lastBody)
 	return resp, err
 }

@@ -304,7 +304,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 				}
 				continue
 			}
-			err = statusErr{code: httpResp.StatusCode, msg: string(data)}
+			err = newGeminiStatusErr(httpResp.StatusCode, data)
 			return nil, err
 		}

@@ -377,7 +377,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	if lastStatus == 0 {
 		lastStatus = 429
 	}
-	err = statusErr{code: lastStatus, msg: string(lastBody)}
+	err = newGeminiStatusErr(lastStatus, lastBody)
 	return nil, err
 }

@@ -485,7 +485,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	if lastStatus == 0 {
 		lastStatus = 429
 	}
-	return cliproxyexecutor.Response{}, statusErr{code: lastStatus, msg: string(lastBody)}
+	return cliproxyexecutor.Response{}, newGeminiStatusErr(lastStatus, lastBody)
 }

 func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
@@ -769,3 +769,42 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
 	}
 	return rawJSON
 }
+
+func newGeminiStatusErr(statusCode int, body []byte) statusErr {
+	err := statusErr{code: statusCode, msg: string(body)}
+	if statusCode == http.StatusTooManyRequests {
+		if retryAfter, parseErr := parseRetryDelay(body); parseErr == nil && retryAfter != nil {
+			err.retryAfter = retryAfter
+		}
+	}
+	return err
+}
+
+// parseRetryDelay extracts the retry delay from a Google API 429 error response.
+// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
+// Returns the parsed duration or an error if it cannot be determined.
+func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
+	// Try to parse the retryDelay from the error response
+	// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
+	details := gjson.GetBytes(errorBody, "error.details")
+	if !details.Exists() || !details.IsArray() {
+		return nil, fmt.Errorf("no error.details found")
+	}
+
+	for _, detail := range details.Array() {
+		typeVal := detail.Get("@type").String()
+		if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
+			retryDelay := detail.Get("retryDelay").String()
+			if retryDelay != "" {
+				// Parse duration string like "0.847655010s"
+				duration, err := time.ParseDuration(retryDelay)
+				if err != nil {
+					return nil, fmt.Errorf("failed to parse duration")
+				}
+				return &duration, nil
+			}
+		}
+	}
+
+	return nil, fmt.Errorf("no RetryInfo found")
+}
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -112,6 +112,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseOpenAIUsage(data))
+	// Ensure usage is recorded even if upstream omits usage metadata.
+	reporter.ensurePublished(ctx)

 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
@@ -217,6 +219,8 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			reporter.publishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
+		// Guarantee a usage record exists even if the stream never emitted usage data.
+		reporter.ensurePublished(ctx)
 	}()

 	return stream, nil
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -323,7 +323,14 @@ func formatAuthInfo(info upstreamRequestLog) string {
 }

 func summarizeErrorBody(contentType string, body []byte) string {
-	if strings.Contains(strings.ToLower(contentType), "text/html") {
+	isHTML := strings.Contains(strings.ToLower(contentType), "text/html")
+	if !isHTML {
+		trimmed := bytes.TrimSpace(bytes.ToLower(body))
+		if bytes.HasPrefix(trimmed, []byte("<!doctype html")) || bytes.HasPrefix(trimmed, []byte("<html")) {
+			isHTML = true
+		}
+	}
+	if isHTML {
 		if title := extractHTMLTitle(body); title != "" {
 			return title
 		}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -340,8 +341,9 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
 }

 type statusErr struct {
-	code int
-	msg  string
+	code       int
+	msg        string
+	retryAfter *time.Duration
 }

 func (e statusErr) Error() string {
@@ -350,4 +352,5 @@ func (e statusErr) Error() string {
 	}
 	return fmt.Sprintf("status %d", e.code)
 }
-func (e statusErr) StatusCode() int { return e.code }
+func (e statusErr) StatusCode() int            { return e.code }
+func (e statusErr) RetryAfter() *time.Duration { return e.retryAfter }
--- a/internal/runtime/executor/usage_helpers.go
+++ b/internal/runtime/executor/usage_helpers.go
@@ -18,6 +18,7 @@ type usageReporter struct {
 	provider    string
 	model       string
 	authID      string
+	authIndex   uint64
 	apiKey      string
 	source      string
 	requestedAt time.Time
@@ -35,6 +36,7 @@ func newUsageReporter(ctx context.Context, provider, model string, auth *cliprox
 	}
 	if auth != nil {
 		reporter.authID = auth.ID
+		reporter.authIndex = auth.Index
 	}
 	return reporter
 }
@@ -76,6 +78,7 @@ func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
 			Source:      r.source,
 			APIKey:      r.apiKey,
 			AuthID:      r.authID,
+			AuthIndex:   r.authIndex,
 			RequestedAt: r.requestedAt,
 			Failed:      failed,
 			Detail:      detail,
@@ -98,6 +101,7 @@ func (r *usageReporter) ensurePublished(ctx context.Context) {
 			Source:      r.source,
 			APIKey:      r.apiKey,
 			AuthID:      r.authID,
+			AuthIndex:   r.authIndex,
 			RequestedAt: r.requestedAt,
 			Failed:      false,
 			Detail:      usage.Detail{},
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -22,6 +22,7 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "max_completion_tokens")
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "temperature")
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p")
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier")

 	originalInstructions := ""
 	originalInstructionsText := ""
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -88,6 +88,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		}
 	}

+	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
+	// This matches the official Gemini CLI behavior which always sends:
+	// { thinkingBudget: -1, includeThoughts: true }
+	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
+	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
+		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
+		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	}
+
 	// Temperature/top_p/top_k
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
@@ -286,6 +295,17 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 						renamed, errRename := util.RenameKey(fnRaw, "parameters", "parametersJsonSchema")
 						if errRename != nil {
 							log.Warnf("Failed to rename parameters for tool '%s': %v", fn.Get("name").String(), errRename)
+							var errSet error
+							fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.type", "object")
+							if errSet != nil {
+								log.Warnf("Failed to set default schema type for tool '%s': %v", fn.Get("name").String(), errSet)
+								continue
+							}
+							fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.properties", map[string]interface{}{})
+							if errSet != nil {
+								log.Warnf("Failed to set default schema properties for tool '%s': %v", fn.Get("name").String(), errSet)
+								continue
+							}
 						} else {
 							fnRaw = renamed
 						}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -306,8 +306,34 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
 				if fn.Exists() && fn.IsObject() {
-					parametersJsonSchema, _ := util.RenameKey(fn.Raw, "parameters", "parametersJsonSchema")
-					out, _ = sjson.SetRawBytes(out, fdPath+".-1", []byte(parametersJsonSchema))
+					fnRaw := fn.Raw
+					if fn.Get("parameters").Exists() {
+						renamed, errRename := util.RenameKey(fnRaw, "parameters", "parametersJsonSchema")
+						if errRename != nil {
+							log.Warnf("Failed to rename parameters for tool '%s': %v", fn.Get("name").String(), errRename)
+						} else {
+							fnRaw = renamed
+						}
+					} else {
+						var errSet error
+						fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.type", "object")
+						if errSet != nil {
+							log.Warnf("Failed to set default schema type for tool '%s': %v", fn.Get("name").String(), errSet)
+							continue
+						}
+						fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.properties", map[string]interface{}{})
+						if errSet != nil {
+							log.Warnf("Failed to set default schema properties for tool '%s': %v", fn.Get("name").String(), errSet)
+							continue
+						}
+					}
+					fnRaw, _ = sjson.Delete(fnRaw, "strict")
+					tmp, errSet := sjson.SetRawBytes(out, fdPath+".-1", []byte(fnRaw))
+					if errSet != nil {
+						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
+						continue
+					}
+					out = tmp
 				}
 			}
 		}
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -6,6 +6,7 @@ import (

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -294,6 +295,17 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			}
 		}
 	}
+
+	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
+	// This matches the official Gemini CLI behavior which always sends:
+	// { thinkingBudget: -1, includeThoughts: true }
+	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
+	if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
+		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
+		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
+		log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
+	}
+
 	result := []byte(out)
 	result = common.AttachDefaultSafetySettings(result, "safetySettings")
 	return result
--- a/internal/usage/logger_plugin.go
+++ b/internal/usage/logger_plugin.go
@@ -90,6 +90,7 @@ type modelStats struct {
 type RequestDetail struct {
 	Timestamp time.Time  `json:"timestamp"`
 	Source    string     `json:"source"`
+	AuthIndex uint64     `json:"auth_index"`
 	Tokens    TokenStats `json:"tokens"`
 	Failed    bool       `json:"failed"`
 }
@@ -197,6 +198,7 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record)
 	s.updateAPIStats(stats, modelName, RequestDetail{
 		Timestamp: timestamp,
 		Source:    record.Source,
+		AuthIndex: record.AuthIndex,
 		Tokens:    detail,
 		Failed:    failed,
 	})
--- a/sdk/cliproxy/auth/manager.go
+++ b/sdk/cliproxy/auth/manager.go
@@ -62,6 +62,8 @@ type Result struct {
 	Model string
 	// Success marks whether the execution succeeded.
 	Success bool
+	// RetryAfter carries a provider supplied retry hint (e.g. 429 retryDelay).
+	RetryAfter *time.Duration
 	// Error describes the failure when Success is false.
 	Error *Error
 }
@@ -169,6 +171,7 @@ func (m *Manager) Register(ctx context.Context, auth *Auth) (*Auth, error) {
 	if auth == nil {
 		return nil, nil
 	}
+	auth.EnsureIndex()
 	if auth.ID == "" {
 		auth.ID = uuid.NewString()
 	}
@@ -185,6 +188,7 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
 	if auth == nil || auth.ID == "" {
 		return nil, nil
 	}
+	auth.EnsureIndex()
 	m.mu.Lock()
 	m.auths[auth.ID] = auth.Clone()
 	m.mu.Unlock()
@@ -209,6 +213,7 @@ func (m *Manager) Load(ctx context.Context) error {
 		if auth == nil || auth.ID == "" {
 			continue
 		}
+		auth.EnsureIndex()
 		m.auths[auth.ID] = auth.Clone()
 	}
 	return nil
@@ -322,6 +327,9 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
 			if errors.As(errExec, &se) && se != nil {
 				result.Error.HTTPStatus = se.StatusCode()
 			}
+			if ra := retryAfterFromError(errExec); ra != nil {
+				result.RetryAfter = ra
+			}
 			m.MarkResult(execCtx, result)
 			lastErr = errExec
 			continue
@@ -367,6 +375,9 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
 			if errors.As(errExec, &se) && se != nil {
 				result.Error.HTTPStatus = se.StatusCode()
 			}
+			if ra := retryAfterFromError(errExec); ra != nil {
+				result.RetryAfter = ra
+			}
 			m.MarkResult(execCtx, result)
 			lastErr = errExec
 			continue
@@ -412,6 +423,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
 				rerr.HTTPStatus = se.StatusCode()
 			}
 			result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: false, Error: rerr}
+			result.RetryAfter = retryAfterFromError(errStream)
 			m.MarkResult(execCtx, result)
 			lastErr = errStream
 			continue
@@ -553,17 +565,23 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 					suspendReason = "payment_required"
 					shouldSuspendModel = true
 				case 429:
-					cooldown, nextLevel := nextQuotaCooldown(state.Quota.BackoffLevel)
 					var next time.Time
-					if cooldown > 0 {
-						next = now.Add(cooldown)
+					backoffLevel := state.Quota.BackoffLevel
+					if result.RetryAfter != nil {
+						next = now.Add(*result.RetryAfter)
+					} else {
+						cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
+						if cooldown > 0 {
+							next = now.Add(cooldown)
+						}
+						backoffLevel = nextLevel
 					}
 					state.NextRetryAfter = next
 					state.Quota = QuotaState{
 						Exceeded:      true,
 						Reason:        "quota",
 						NextRecoverAt: next,
-						BackoffLevel:  nextLevel,
+						BackoffLevel:  backoffLevel,
 					}
 					suspendReason = "quota"
 					shouldSuspendModel = true
@@ -579,7 +597,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 				auth.UpdatedAt = now
 				updateAggregatedAvailability(auth, now)
 			} else {
-				applyAuthFailureState(auth, result.Error, now)
+				applyAuthFailureState(auth, result.Error, result.RetryAfter, now)
 			}
 		}

@@ -739,6 +757,25 @@ func cloneError(err *Error) *Error {
 	}
 }

+func retryAfterFromError(err error) *time.Duration {
+	if err == nil {
+		return nil
+	}
+	type retryAfterProvider interface {
+		RetryAfter() *time.Duration
+	}
+	rap, ok := err.(retryAfterProvider)
+	if !ok || rap == nil {
+		return nil
+	}
+	retryAfter := rap.RetryAfter()
+	if retryAfter == nil {
+		return nil
+	}
+	val := *retryAfter
+	return &val
+}
+
 func statusCodeFromResult(err *Error) int {
 	if err == nil {
 		return 0
@@ -746,7 +783,7 @@ func statusCodeFromResult(err *Error) int {
 	return err.StatusCode()
 }

-func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
+func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Duration, now time.Time) {
 	if auth == nil {
 		return
 	}
@@ -771,13 +808,17 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
 		auth.StatusMessage = "quota exhausted"
 		auth.Quota.Exceeded = true
 		auth.Quota.Reason = "quota"
-		cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
 		var next time.Time
-		if cooldown > 0 {
-			next = now.Add(cooldown)
+		if retryAfter != nil {
+			next = now.Add(*retryAfter)
+		} else {
+			cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
+			if cooldown > 0 {
+				next = now.Add(cooldown)
+			}
+			auth.Quota.BackoffLevel = nextLevel
 		}
 		auth.Quota.NextRecoverAt = next
-		auth.Quota.BackoffLevel = nextLevel
 		auth.NextRetryAfter = next
 	case 408, 500, 502, 503, 504:
 		auth.StatusMessage = "transient upstream error"
--- a/sdk/cliproxy/auth/types.go
+++ b/sdk/cliproxy/auth/types.go
@@ -5,6 +5,7 @@ import (
 	"strconv"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"

 	baseauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth"
@@ -14,6 +15,8 @@ import (
 type Auth struct {
 	// ID uniquely identifies the auth record across restarts.
 	ID string `json:"id"`
+	// Index is a monotonically increasing runtime identifier used for diagnostics.
+	Index uint64 `json:"-"`
 	// Provider is the upstream provider key (e.g. "gemini", "claude").
 	Provider string `json:"provider"`
 	// FileName stores the relative or absolute path of the backing auth file.
@@ -55,6 +58,8 @@ type Auth struct {

 	// Runtime carries non-serialisable data used during execution (in-memory only).
 	Runtime any `json:"-"`
+
+	indexAssigned bool `json:"-"`
 }

 // QuotaState contains limiter tracking data for a credential.
@@ -87,6 +92,12 @@ type ModelState struct {
 	UpdatedAt time.Time `json:"updated_at"`
 }

+var authIndexCounter atomic.Uint64
+
+func nextAuthIndex() uint64 {
+	return authIndexCounter.Add(1) - 1
+}
+
 // Clone shallow copies the Auth structure, duplicating maps to avoid accidental mutation.
 func (a *Auth) Clone() *Auth {
 	if a == nil {
@@ -115,6 +126,20 @@ func (a *Auth) Clone() *Auth {
 	return &copyAuth
 }

+// EnsureIndex returns the global index, assigning one if it was not set yet.
+func (a *Auth) EnsureIndex() uint64 {
+	if a == nil {
+		return 0
+	}
+	if a.indexAssigned {
+		return a.Index
+	}
+	idx := nextAuthIndex()
+	a.Index = idx
+	a.indexAssigned = true
+	return idx
+}
+
 // Clone duplicates a model state including nested error details.
 func (m *ModelState) Clone() *ModelState {
 	if m == nil {
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -629,7 +629,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 		models = registry.GetGeminiModels()
 	case "vertex":
 		// Vertex AI Gemini supports the same model identifiers as Gemini.
-		models = registry.GetGeminiModels()
+		models = registry.GetGeminiVertexModels()
 	case "gemini-cli":
 		models = registry.GetGeminiCLIModels()
 	case "aistudio":
--- a/sdk/cliproxy/usage/manager.go
+++ b/sdk/cliproxy/usage/manager.go
@@ -14,6 +14,7 @@ type Record struct {
 	Model       string
 	APIKey      string
 	AuthID      string
+	AuthIndex   uint64
 	Source      string
 	RequestedAt time.Time
 	Failed      bool
Author	SHA1	Message	Date
Luis Pater	98596c0a3f	refactor(translator): remove `service_tier` from Codex OpenAI request payload	2025-11-20 20:12:06 +08:00
Luis Pater	670ce2e528	Merge pull request #285 from router-for-me/iflow feat(iflow): add cookie-based authentication endpoint	2025-11-20 20:04:38 +08:00
hkfires	3f4f8b3b2d	feat(iflow): add cookie-based authentication endpoint	2025-11-20 18:23:43 +08:00
Luis Pater	371324c090	feat(registry): expand Gemini model definitions and support Vertex AI	2025-11-20 18:16:26 +08:00
Luis Pater	d50b0f7524	refactor(executor): simplify Gemini CLI execution and remove internal retry logic - Removed nested retry handling for 429 rate limit errors. - Simplified request/response handling by cleaning redundant retry-related code. - Eliminated `parseRetryDelay` function and max retry configuration logic.	2025-11-20 17:49:37 +08:00
Luis Pater	0586da9c2b	refactor(registry): move Gemini 3 Pro Preview model definition to base set	2025-11-20 10:51:16 +08:00
Luis Pater	618511ff67	Merge pull request #280 from ben-vargas/feat-enable-gemini-3-cli feat: enable Gemini 3 Pro Preview with OAuth support	2025-11-20 08:46:57 +08:00
Ben Vargas	0ff094b87f	fix(executor): prevent streaming on failed response when no fallback Fix critical bug where ExecuteStream would create a streaming channel from a failed (non-2xx) response after exhausting all retries with no fallback models available. When retries were exhausted on the last model, the code would break from the inner loop but fall through to streaming channel creation (line 401), immediately returning at line 461. This made the error handling code at lines 464-471 unreachable, causing clients to receive an empty/closed stream instead of a proper error response. Solution: Check if httpResp is non-2xx before creating the streaming channel. If failed, continue the outer loop to reach error handling. Identified by: codex-bot review Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484560423	2025-11-19 13:14:40 -07:00
Ben Vargas	ed23472d94	fix(executor): prevent streaming from 429 response when fallback available Fix critical bug where ExecuteStream would create a streaming channel using a 429 error response instead of continuing to the next fallback model after exhausting retries. When 429 retries were exhausted and a fallback model was available, the inner retry loop would break but immediately fall through to the streaming channel creation, attempting to stream from the failed 429 response instead of trying the next model. Solution: Add shouldContinueToNextModel flag to explicitly skip the streaming logic and continue the outer model loop when appropriate. Identified by: codex-bot review Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484479106	2025-11-19 13:05:38 -07:00
Ben Vargas	ede4471b84	feat(translator): add default thinkingConfig for gemini-3-pro-preview Match official Gemini CLI behavior by always sending default thinkingConfig when client doesn't specify reasoning parameters. - Set thinkingBudget=-1 (dynamic) for gemini-3-pro-preview - Set include_thoughts=true to return thinking process - Apply to both /v1/chat/completions and /v1/responses endpoints - See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts	2025-11-19 12:47:39 -07:00
Ben Vargas	6a3de3a89c	feat(executor): add intelligent retry logic for 429 rate limits Implement Google RetryInfo.retryDelay support for handling 429 rate limit errors. Retries same model up to 3 times using exact delays from Google's API before trying fallback models. - Add parseRetryDelay() to extract Google's retry guidance - Implement inner retry loop in Execute() and ExecuteStream() - Context-aware waiting with cancellation support - Cap delays at 60s maximum for safety	2025-11-19 12:47:39 -07:00
Ben Vargas	782bba0bc4	feat(registry): enable gemini-3-pro-preview for gemini-cli provider Add gemini-3-pro-preview model to GetGeminiCLIModels() to make it available for OAuth-based Gemini CLI users, matching the model already available in AI Studio provider. Model spec: - ID: gemini-3-pro-preview - Version: 3.0 - Input: 1M tokens - Output: 64K tokens - Thinking: 128-32K tokens (dynamic)	2025-11-19 12:47:39 -07:00
Luis Pater	bf116b68f8	feat(registry): add GPT-5.1 Codex Max model definitions and support - Introduced `gpt-5.1-codex-max` variants to model definitions (`low`, `medium`, `high`, `xhigh`). - Updated executor logic to map effort levels for Codex Max models. - Added `lastCodexMaxPrompt` processing for `gpt-5.1-codex-max` prompts. - Defined instructions for `gpt-5.1-codex-max` in a new file: `codex_instructions/gpt-5.1-codex-max_prompt.md`.	2025-11-20 03:12:22 +08:00
Luis Pater	cc3cf09c00	feat(auth): add AuthIndex for diagnostics and ensure usage recording	2025-11-19 22:02:40 +08:00
Luis Pater	9acfbcc2a0	Merge pull request #275 from router-for-me/iflow Iflow	2025-11-19 20:44:54 +08:00
hkfires	b285b07986	fix(iflow): adjust auth filename email sanitization	2025-11-19 19:50:06 +08:00
Luis Pater	c40e00526b	Merge pull request #274 from router-for-me/log fix: detect HTML error bodies without text/html content type	2025-11-19 17:40:06 +08:00
hkfires	8a33f3ef69	fix: detect HTML error bodies without text/html content type	2025-11-19 14:45:33 +08:00
Luis Pater	7a8e00fcea	fix(translator): handle missing parameters in Gemini tool schema gracefully	2025-11-19 13:19:46 +08:00