fix(management): improve error handling and normalize YAML comment indentation

Enhance error management for file operations and clean up temporary files. Add `NormalizeCommentIndentation` function to ensure YAML comments maintain consistent formatting.
feat(management): add Vertex service account import and WebSocket auth management
2026-02-02 12:30:50 +08:00 · 2025-11-11 08:37:57 +08:00 · 2025-11-10 20:48:31 +08:00 · 2025-11-10 18:37:05 +08:00 · 2025-11-10 12:57:40 +08:00 · 2025-11-10 12:24:07 +08:00
28 changed files with 1913 additions and 221 deletions
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -57,6 +57,7 @@ func main() {
 	var iflowLogin bool
 	var noBrowser bool
 	var projectID string
+	var vertexImport string
 	var configPath string
 	var password string

@@ -69,6 +70,7 @@ func main() {
 	flag.BoolVar(&noBrowser, "no-browser", false, "Don't open browser automatically for OAuth")
 	flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
 	flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
+	flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file")
 	flag.StringVar(&password, "password", "", "")

 	flag.CommandLine.Usage = func() {
@@ -417,7 +419,10 @@ func main() {

 	// Handle different command modes based on the provided flags.

-	if login {
+	if vertexImport != "" {
+		// Handle Vertex service account import
+		cmd.DoVertexImport(cfg, vertexImport)
+	} else if login {
 		// Handle Google/Gemini login
 		cmd.DoLogin(cfg, projectID, options)
 	} else if codexLogin {
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -49,10 +49,10 @@ ws-auth: false
 # Gemini API keys (preferred)
 #gemini-api-key:
 #  - api-key: "AIzaSy...01"
-#    # base-url: "https://generativelanguage.googleapis.com"
-#    # headers:
-#    #   X-Custom-Header: "custom-value"
-#    # proxy-url: "socks5://proxy.example.com:1080"
+#    base-url: "https://generativelanguage.googleapis.com"
+#    headers:
+#      X-Custom-Header: "custom-value"
+#    proxy-url: "socks5://proxy.example.com:1080"
 #  - api-key: "AIzaSy...02"

 # API keys for official Generative Language API (legacy compatibility)
@@ -64,6 +64,8 @@ ws-auth: false
 #codex-api-key:
 #  - api-key: "sk-atSM..."
 #    base-url: "https://www.example.com" # use the custom codex API endpoint
+#    headers:
+#      X-Custom-Header: "custom-value"
 #    proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override

 # Claude API keys
@@ -71,6 +73,8 @@ ws-auth: false
 #  - api-key: "sk-atSM..." # use the official claude API key, no need to set the base url
 #  - api-key: "sk-atSM..."
 #    base-url: "https://www.example.com" # use the custom claude API endpoint
+#    headers:
+#      X-Custom-Header: "custom-value"
 #    proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
 #    models:
 #      - name: "claude-3-5-sonnet-20241022" # upstream model name
@@ -80,6 +84,8 @@ ws-auth: false
 #openai-compatibility:
 #  - name: "openrouter" # The name of the provider; it will be used in the user agent and other places.
 #    base-url: "https://openrouter.ai/api/v1" # The base URL of the provider.
+#    headers:
+#      X-Custom-Header: "custom-value"
 #    # New format with per-key proxy support (recommended):
 #    api-key-entries:
 #      - api-key: "sk-or-v1-...b780"
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -12,6 +12,7 @@ import (
 	"net/url"
 	"os"
 	"path/filepath"
+	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -229,8 +230,32 @@ func (h *Handler) managementCallbackURL(path string) (string, error) {
 	return fmt.Sprintf("http://127.0.0.1:%d%s", h.cfg.Port, path), nil
 }

-// List auth files
 func (h *Handler) ListAuthFiles(c *gin.Context) {
+	if h == nil {
+		c.JSON(500, gin.H{"error": "handler not initialized"})
+		return
+	}
+	if h.authManager == nil {
+		h.listAuthFilesFromDisk(c)
+		return
+	}
+	auths := h.authManager.List()
+	files := make([]gin.H, 0, len(auths))
+	for _, auth := range auths {
+		if entry := h.buildAuthFileEntry(auth); entry != nil {
+			files = append(files, entry)
+		}
+	}
+	sort.Slice(files, func(i, j int) bool {
+		nameI, _ := files[i]["name"].(string)
+		nameJ, _ := files[j]["name"].(string)
+		return strings.ToLower(nameI) < strings.ToLower(nameJ)
+	})
+	c.JSON(200, gin.H{"files": files})
+}
+
+// List auth files from disk when the auth manager is unavailable.
+func (h *Handler) listAuthFilesFromDisk(c *gin.Context) {
 	entries, err := os.ReadDir(h.cfg.AuthDir)
 	if err != nil {
 		c.JSON(500, gin.H{"error": fmt.Sprintf("failed to read auth dir: %v", err)})
@@ -263,6 +288,106 @@ func (h *Handler) ListAuthFiles(c *gin.Context) {
 	c.JSON(200, gin.H{"files": files})
 }

+func (h *Handler) buildAuthFileEntry(auth *coreauth.Auth) gin.H {
+	if auth == nil {
+		return nil
+	}
+	runtimeOnly := isRuntimeOnlyAuth(auth)
+	if runtimeOnly && (auth.Disabled || auth.Status == coreauth.StatusDisabled) {
+		return nil
+	}
+	path := strings.TrimSpace(authAttribute(auth, "path"))
+	if path == "" && !runtimeOnly {
+		return nil
+	}
+	name := strings.TrimSpace(auth.FileName)
+	if name == "" {
+		name = auth.ID
+	}
+	entry := gin.H{
+		"id":             auth.ID,
+		"name":           name,
+		"type":           strings.TrimSpace(auth.Provider),
+		"provider":       strings.TrimSpace(auth.Provider),
+		"label":          auth.Label,
+		"status":         auth.Status,
+		"status_message": auth.StatusMessage,
+		"disabled":       auth.Disabled,
+		"unavailable":    auth.Unavailable,
+		"runtime_only":   runtimeOnly,
+		"source":         "memory",
+		"size":           int64(0),
+	}
+	if email := authEmail(auth); email != "" {
+		entry["email"] = email
+	}
+	if accountType, account := auth.AccountInfo(); accountType != "" || account != "" {
+		if accountType != "" {
+			entry["account_type"] = accountType
+		}
+		if account != "" {
+			entry["account"] = account
+		}
+	}
+	if !auth.CreatedAt.IsZero() {
+		entry["created_at"] = auth.CreatedAt
+	}
+	if !auth.UpdatedAt.IsZero() {
+		entry["modtime"] = auth.UpdatedAt
+		entry["updated_at"] = auth.UpdatedAt
+	}
+	if !auth.LastRefreshedAt.IsZero() {
+		entry["last_refresh"] = auth.LastRefreshedAt
+	}
+	if path != "" {
+		entry["path"] = path
+		entry["source"] = "file"
+		if info, err := os.Stat(path); err == nil {
+			entry["size"] = info.Size()
+			entry["modtime"] = info.ModTime()
+		} else if os.IsNotExist(err) {
+			entry["source"] = "memory"
+		} else {
+			log.WithError(err).Warnf("failed to stat auth file %s", path)
+		}
+	}
+	return entry
+}
+
+func authEmail(auth *coreauth.Auth) string {
+	if auth == nil {
+		return ""
+	}
+	if auth.Metadata != nil {
+		if v, ok := auth.Metadata["email"].(string); ok {
+			return strings.TrimSpace(v)
+		}
+	}
+	if auth.Attributes != nil {
+		if v := strings.TrimSpace(auth.Attributes["email"]); v != "" {
+			return v
+		}
+		if v := strings.TrimSpace(auth.Attributes["account_email"]); v != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+func authAttribute(auth *coreauth.Auth, key string) string {
+	if auth == nil || len(auth.Attributes) == 0 {
+		return ""
+	}
+	return auth.Attributes[key]
+}
+
+func isRuntimeOnlyAuth(auth *coreauth.Auth) bool {
+	if auth == nil || len(auth.Attributes) == 0 {
+		return false
+	}
+	return strings.EqualFold(strings.TrimSpace(auth.Attributes["runtime_only"]), "true")
+}
+
 // Download single auth file by name
 func (h *Handler) DownloadAuthFile(c *gin.Context) {
 	name := c.Query("name")
--- a/internal/api/handlers/management/config_basic.go
+++ b/internal/api/handlers/management/config_basic.go
@@ -28,7 +28,7 @@ func (h *Handler) GetConfigYAML(c *gin.Context) {
 		return
 	}
 	var node yaml.Node
-	if err := yaml.Unmarshal(data, &node); err != nil {
+	if err = yaml.Unmarshal(data, &node); err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "parse_failed", "message": err.Error()})
 		return
 	}
@@ -41,17 +41,18 @@ func (h *Handler) GetConfigYAML(c *gin.Context) {
 }

 func WriteConfig(path string, data []byte) error {
+	data = config.NormalizeCommentIndentation(data)
 	f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
 	if err != nil {
 		return err
 	}
-	if _, err := f.Write(data); err != nil {
-		f.Close()
-		return err
+	if _, errWrite := f.Write(data); errWrite != nil {
+		_ = f.Close()
+		return errWrite
 	}
-	if err := f.Sync(); err != nil {
-		f.Close()
-		return err
+	if errSync := f.Sync(); errSync != nil {
+		_ = f.Close()
+		return errSync
 	}
 	return f.Close()
 }
@@ -63,7 +64,7 @@ func (h *Handler) PutConfigYAML(c *gin.Context) {
 		return
 	}
 	var cfg config.Config
-	if err := yaml.Unmarshal(body, &cfg); err != nil {
+	if err = yaml.Unmarshal(body, &cfg); err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid_yaml", "message": err.Error()})
 		return
 	}
@@ -75,18 +76,20 @@ func (h *Handler) PutConfigYAML(c *gin.Context) {
 		return
 	}
 	tempFile := tmpFile.Name()
-	if _, err := tmpFile.Write(body); err != nil {
-		tmpFile.Close()
-		os.Remove(tempFile)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "write_failed", "message": err.Error()})
+	if _, errWrite := tmpFile.Write(body); errWrite != nil {
+		_ = tmpFile.Close()
+		_ = os.Remove(tempFile)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "write_failed", "message": errWrite.Error()})
 		return
 	}
-	if err := tmpFile.Close(); err != nil {
-		os.Remove(tempFile)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "write_failed", "message": err.Error()})
+	if errClose := tmpFile.Close(); errClose != nil {
+		_ = os.Remove(tempFile)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "write_failed", "message": errClose.Error()})
 		return
 	}
-	defer os.Remove(tempFile)
+	defer func() {
+		_ = os.Remove(tempFile)
+	}()
 	_, err = config.LoadConfigOptional(tempFile, false)
 	if err != nil {
 		c.JSON(http.StatusUnprocessableEntity, gin.H{"error": "invalid_config", "message": err.Error()})
@@ -153,6 +156,14 @@ func (h *Handler) PutRequestLog(c *gin.Context) {
 	h.updateBoolField(c, func(v bool) { h.cfg.RequestLog = v })
 }

+// Websocket auth
+func (h *Handler) GetWebsocketAuth(c *gin.Context) {
+	c.JSON(200, gin.H{"ws-auth": h.cfg.WebsocketAuth})
+}
+func (h *Handler) PutWebsocketAuth(c *gin.Context) {
+	h.updateBoolField(c, func(v bool) { h.cfg.WebsocketAuth = v })
+}
+
 // Request retry
 func (h *Handler) GetRequestRetry(c *gin.Context) {
 	c.JSON(200, gin.H{"request-retry": h.cfg.RequestRetry})
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -148,7 +148,7 @@ func (h *Handler) applyLegacyKeys(keys []string) {
 	}
 	h.cfg.GeminiKey = newList
 	h.cfg.GlAPIKey = sanitized
-	h.cfg.SyncGeminiKeys()
+	h.cfg.SanitizeGeminiKeys()
 }

 // api-keys
@@ -206,7 +206,7 @@ func (h *Handler) PutGeminiKeys(c *gin.Context) {
 		arr = obj.Items
 	}
 	h.cfg.GeminiKey = append([]config.GeminiKey(nil), arr...)
-	h.cfg.SyncGeminiKeys()
+	h.cfg.SanitizeGeminiKeys()
 	h.persist(c)
 }
 func (h *Handler) PatchGeminiKey(c *gin.Context) {
@@ -227,7 +227,7 @@ func (h *Handler) PatchGeminiKey(c *gin.Context) {
 		// Treat empty API key as delete.
 		if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.GeminiKey) {
 			h.cfg.GeminiKey = append(h.cfg.GeminiKey[:*body.Index], h.cfg.GeminiKey[*body.Index+1:]...)
-			h.cfg.SyncGeminiKeys()
+			h.cfg.SanitizeGeminiKeys()
 			h.persist(c)
 			return
 		}
@@ -245,7 +245,7 @@ func (h *Handler) PatchGeminiKey(c *gin.Context) {
 				}
 				if removed {
 					h.cfg.GeminiKey = out
-					h.cfg.SyncGeminiKeys()
+					h.cfg.SanitizeGeminiKeys()
 					h.persist(c)
 					return
 				}
@@ -257,7 +257,7 @@ func (h *Handler) PatchGeminiKey(c *gin.Context) {

 	if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.GeminiKey) {
 		h.cfg.GeminiKey[*body.Index] = value
-		h.cfg.SyncGeminiKeys()
+		h.cfg.SanitizeGeminiKeys()
 		h.persist(c)
 		return
 	}
@@ -266,7 +266,7 @@ func (h *Handler) PatchGeminiKey(c *gin.Context) {
 		for i := range h.cfg.GeminiKey {
 			if h.cfg.GeminiKey[i].APIKey == match {
 				h.cfg.GeminiKey[i] = value
-				h.cfg.SyncGeminiKeys()
+				h.cfg.SanitizeGeminiKeys()
 				h.persist(c)
 				return
 			}
@@ -284,7 +284,7 @@ func (h *Handler) DeleteGeminiKey(c *gin.Context) {
 		}
 		if len(out) != len(h.cfg.GeminiKey) {
 			h.cfg.GeminiKey = out
-			h.cfg.SyncGeminiKeys()
+			h.cfg.SanitizeGeminiKeys()
 			h.persist(c)
 		} else {
 			c.JSON(404, gin.H{"error": "item not found"})
@@ -295,7 +295,7 @@ func (h *Handler) DeleteGeminiKey(c *gin.Context) {
 		var idx int
 		if _, err := fmt.Sscanf(idxStr, "%d", &idx); err == nil && idx >= 0 && idx < len(h.cfg.GeminiKey) {
 			h.cfg.GeminiKey = append(h.cfg.GeminiKey[:idx], h.cfg.GeminiKey[idx+1:]...)
-			h.cfg.SyncGeminiKeys()
+			h.cfg.SanitizeGeminiKeys()
 			h.persist(c)
 			return
 		}
@@ -328,6 +328,7 @@ func (h *Handler) PutClaudeKeys(c *gin.Context) {
 		normalizeClaudeKey(&arr[i])
 	}
 	h.cfg.ClaudeKey = arr
+	h.cfg.SanitizeClaudeKeys()
 	h.persist(c)
 }
 func (h *Handler) PatchClaudeKey(c *gin.Context) {
@@ -340,16 +341,19 @@ func (h *Handler) PatchClaudeKey(c *gin.Context) {
 		c.JSON(400, gin.H{"error": "invalid body"})
 		return
 	}
-	normalizeClaudeKey(body.Value)
+	value := *body.Value
+	normalizeClaudeKey(&value)
 	if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.ClaudeKey) {
-		h.cfg.ClaudeKey[*body.Index] = *body.Value
+		h.cfg.ClaudeKey[*body.Index] = value
+		h.cfg.SanitizeClaudeKeys()
 		h.persist(c)
 		return
 	}
 	if body.Match != nil {
 		for i := range h.cfg.ClaudeKey {
 			if h.cfg.ClaudeKey[i].APIKey == *body.Match {
-				h.cfg.ClaudeKey[i] = *body.Value
+				h.cfg.ClaudeKey[i] = value
+				h.cfg.SanitizeClaudeKeys()
 				h.persist(c)
 				return
 			}
@@ -366,6 +370,7 @@ func (h *Handler) DeleteClaudeKey(c *gin.Context) {
 			}
 		}
 		h.cfg.ClaudeKey = out
+		h.cfg.SanitizeClaudeKeys()
 		h.persist(c)
 		return
 	}
@@ -374,6 +379,7 @@ func (h *Handler) DeleteClaudeKey(c *gin.Context) {
 		_, err := fmt.Sscanf(idxStr, "%d", &idx)
 		if err == nil && idx >= 0 && idx < len(h.cfg.ClaudeKey) {
 			h.cfg.ClaudeKey = append(h.cfg.ClaudeKey[:idx], h.cfg.ClaudeKey[idx+1:]...)
+			h.cfg.SanitizeClaudeKeys()
 			h.persist(c)
 			return
 		}
@@ -413,6 +419,7 @@ func (h *Handler) PutOpenAICompat(c *gin.Context) {
 		}
 	}
 	h.cfg.OpenAICompatibility = filtered
+	h.cfg.SanitizeOpenAICompatibility()
 	h.persist(c)
 }
 func (h *Handler) PatchOpenAICompat(c *gin.Context) {
@@ -430,6 +437,7 @@ func (h *Handler) PatchOpenAICompat(c *gin.Context) {
 	if strings.TrimSpace(body.Value.BaseURL) == "" {
 		if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.OpenAICompatibility) {
 			h.cfg.OpenAICompatibility = append(h.cfg.OpenAICompatibility[:*body.Index], h.cfg.OpenAICompatibility[*body.Index+1:]...)
+			h.cfg.SanitizeOpenAICompatibility()
 			h.persist(c)
 			return
 		}
@@ -445,6 +453,7 @@ func (h *Handler) PatchOpenAICompat(c *gin.Context) {
 			}
 			if removed {
 				h.cfg.OpenAICompatibility = out
+				h.cfg.SanitizeOpenAICompatibility()
 				h.persist(c)
 				return
 			}
@@ -454,6 +463,7 @@ func (h *Handler) PatchOpenAICompat(c *gin.Context) {
 	}
 	if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.OpenAICompatibility) {
 		h.cfg.OpenAICompatibility[*body.Index] = *body.Value
+		h.cfg.SanitizeOpenAICompatibility()
 		h.persist(c)
 		return
 	}
@@ -461,6 +471,7 @@ func (h *Handler) PatchOpenAICompat(c *gin.Context) {
 		for i := range h.cfg.OpenAICompatibility {
 			if h.cfg.OpenAICompatibility[i].Name == *body.Name {
 				h.cfg.OpenAICompatibility[i] = *body.Value
+				h.cfg.SanitizeOpenAICompatibility()
 				h.persist(c)
 				return
 			}
@@ -477,6 +488,7 @@ func (h *Handler) DeleteOpenAICompat(c *gin.Context) {
 			}
 		}
 		h.cfg.OpenAICompatibility = out
+		h.cfg.SanitizeOpenAICompatibility()
 		h.persist(c)
 		return
 	}
@@ -485,6 +497,7 @@ func (h *Handler) DeleteOpenAICompat(c *gin.Context) {
 		_, err := fmt.Sscanf(idxStr, "%d", &idx)
 		if err == nil && idx >= 0 && idx < len(h.cfg.OpenAICompatibility) {
 			h.cfg.OpenAICompatibility = append(h.cfg.OpenAICompatibility[:idx], h.cfg.OpenAICompatibility[idx+1:]...)
+			h.cfg.SanitizeOpenAICompatibility()
 			h.persist(c)
 			return
 		}
@@ -517,13 +530,17 @@ func (h *Handler) PutCodexKeys(c *gin.Context) {
 	filtered := make([]config.CodexKey, 0, len(arr))
 	for i := range arr {
 		entry := arr[i]
+		entry.APIKey = strings.TrimSpace(entry.APIKey)
 		entry.BaseURL = strings.TrimSpace(entry.BaseURL)
+		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
+		entry.Headers = config.NormalizeHeaders(entry.Headers)
 		if entry.BaseURL == "" {
 			continue
 		}
 		filtered = append(filtered, entry)
 	}
 	h.cfg.CodexKey = filtered
+	h.cfg.SanitizeCodexKeys()
 	h.persist(c)
 }
 func (h *Handler) PatchCodexKey(c *gin.Context) {
@@ -536,10 +553,16 @@ func (h *Handler) PatchCodexKey(c *gin.Context) {
 		c.JSON(400, gin.H{"error": "invalid body"})
 		return
 	}
+	value := *body.Value
+	value.APIKey = strings.TrimSpace(value.APIKey)
+	value.BaseURL = strings.TrimSpace(value.BaseURL)
+	value.ProxyURL = strings.TrimSpace(value.ProxyURL)
+	value.Headers = config.NormalizeHeaders(value.Headers)
 	// If base-url becomes empty, delete instead of update
-	if strings.TrimSpace(body.Value.BaseURL) == "" {
+	if value.BaseURL == "" {
 		if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.CodexKey) {
 			h.cfg.CodexKey = append(h.cfg.CodexKey[:*body.Index], h.cfg.CodexKey[*body.Index+1:]...)
+			h.cfg.SanitizeCodexKeys()
 			h.persist(c)
 			return
 		}
@@ -555,20 +578,23 @@ func (h *Handler) PatchCodexKey(c *gin.Context) {
 			}
 			if removed {
 				h.cfg.CodexKey = out
+				h.cfg.SanitizeCodexKeys()
 				h.persist(c)
 				return
 			}
 		}
 	} else {
 		if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.CodexKey) {
-			h.cfg.CodexKey[*body.Index] = *body.Value
+			h.cfg.CodexKey[*body.Index] = value
+			h.cfg.SanitizeCodexKeys()
 			h.persist(c)
 			return
 		}
 		if body.Match != nil {
 			for i := range h.cfg.CodexKey {
 				if h.cfg.CodexKey[i].APIKey == *body.Match {
-					h.cfg.CodexKey[i] = *body.Value
+					h.cfg.CodexKey[i] = value
+					h.cfg.SanitizeCodexKeys()
 					h.persist(c)
 					return
 				}
@@ -586,6 +612,7 @@ func (h *Handler) DeleteCodexKey(c *gin.Context) {
 			}
 		}
 		h.cfg.CodexKey = out
+		h.cfg.SanitizeCodexKeys()
 		h.persist(c)
 		return
 	}
@@ -594,6 +621,7 @@ func (h *Handler) DeleteCodexKey(c *gin.Context) {
 		_, err := fmt.Sscanf(idxStr, "%d", &idx)
 		if err == nil && idx >= 0 && idx < len(h.cfg.CodexKey) {
 			h.cfg.CodexKey = append(h.cfg.CodexKey[:idx], h.cfg.CodexKey[idx+1:]...)
+			h.cfg.SanitizeCodexKeys()
 			h.persist(c)
 			return
 		}
@@ -607,6 +635,7 @@ func normalizeOpenAICompatibilityEntry(entry *config.OpenAICompatibility) {
 	}
 	// Trim base-url; empty base-url indicates provider should be removed by sanitization
 	entry.BaseURL = strings.TrimSpace(entry.BaseURL)
+	entry.Headers = config.NormalizeHeaders(entry.Headers)
 	existing := make(map[string]struct{}, len(entry.APIKeyEntries))
 	for i := range entry.APIKeyEntries {
 		trimmed := strings.TrimSpace(entry.APIKeyEntries[i].APIKey)
@@ -658,6 +687,7 @@ func normalizeClaudeKey(entry *config.ClaudeKey) {
 	entry.APIKey = strings.TrimSpace(entry.APIKey)
 	entry.BaseURL = strings.TrimSpace(entry.BaseURL)
 	entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
+	entry.Headers = config.NormalizeHeaders(entry.Headers)
 	if len(entry.Models) == 0 {
 		return
 	}
--- a/internal/api/handlers/management/vertex_import.go
+++ b/internal/api/handlers/management/vertex_import.go
@@ -0,0 +1,156 @@
+package management
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+// ImportVertexCredential handles uploading a Vertex service account JSON and saving it as an auth record.
+func (h *Handler) ImportVertexCredential(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "config unavailable"})
+		return
+	}
+	if h.cfg.AuthDir == "" {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "auth directory not configured"})
+		return
+	}
+
+	fileHeader, err := c.FormFile("file")
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "file required"})
+		return
+	}
+
+	file, err := fileHeader.Open()
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("failed to read file: %v", err)})
+		return
+	}
+	defer file.Close()
+
+	data, err := io.ReadAll(file)
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("failed to read file: %v", err)})
+		return
+	}
+
+	var serviceAccount map[string]any
+	if err := json.Unmarshal(data, &serviceAccount); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid json", "message": err.Error()})
+		return
+	}
+
+	normalizedSA, err := vertex.NormalizeServiceAccountMap(serviceAccount)
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid service account", "message": err.Error()})
+		return
+	}
+	serviceAccount = normalizedSA
+
+	projectID := strings.TrimSpace(valueAsString(serviceAccount["project_id"]))
+	if projectID == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "project_id missing"})
+		return
+	}
+	email := strings.TrimSpace(valueAsString(serviceAccount["client_email"]))
+
+	location := strings.TrimSpace(c.PostForm("location"))
+	if location == "" {
+		location = strings.TrimSpace(c.Query("location"))
+	}
+	if location == "" {
+		location = "us-central1"
+	}
+
+	fileName := fmt.Sprintf("vertex-%s.json", sanitizeVertexFilePart(projectID))
+	label := labelForVertex(projectID, email)
+	storage := &vertex.VertexCredentialStorage{
+		ServiceAccount: serviceAccount,
+		ProjectID:      projectID,
+		Email:          email,
+		Location:       location,
+		Type:           "vertex",
+	}
+	metadata := map[string]any{
+		"service_account": serviceAccount,
+		"project_id":      projectID,
+		"email":           email,
+		"location":        location,
+		"type":            "vertex",
+		"label":           label,
+	}
+	record := &coreauth.Auth{
+		ID:       fileName,
+		Provider: "vertex",
+		FileName: fileName,
+		Storage:  storage,
+		Label:    label,
+		Metadata: metadata,
+	}
+
+	ctx := context.Background()
+	if reqCtx := c.Request.Context(); reqCtx != nil {
+		ctx = reqCtx
+	}
+	savedPath, err := h.saveTokenRecord(ctx, record)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "save_failed", "message": err.Error()})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"status":     "ok",
+		"auth-file":  savedPath,
+		"project_id": projectID,
+		"email":      email,
+		"location":   location,
+	})
+}
+
+func valueAsString(v any) string {
+	if v == nil {
+		return ""
+	}
+	switch t := v.(type) {
+	case string:
+		return t
+	default:
+		return fmt.Sprint(t)
+	}
+}
+
+func sanitizeVertexFilePart(s string) string {
+	out := strings.TrimSpace(s)
+	replacers := []string{"/", "_", "\\", "_", ":", "_", " ", "-"}
+	for i := 0; i < len(replacers); i += 2 {
+		out = strings.ReplaceAll(out, replacers[i], replacers[i+1])
+	}
+	if out == "" {
+		return "vertex"
+	}
+	return out
+}
+
+func labelForVertex(projectID, email string) string {
+	p := strings.TrimSpace(projectID)
+	e := strings.TrimSpace(email)
+	if p != "" && e != "" {
+		return fmt.Sprintf("%s (%s)", p, e)
+	}
+	if p != "" {
+		return p
+	}
+	if e != "" {
+		return e
+	}
+	return "vertex"
+}
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -484,6 +484,9 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/request-log", s.mgmt.GetRequestLog)
 		mgmt.PUT("/request-log", s.mgmt.PutRequestLog)
 		mgmt.PATCH("/request-log", s.mgmt.PutRequestLog)
+		mgmt.GET("/ws-auth", s.mgmt.GetWebsocketAuth)
+		mgmt.PUT("/ws-auth", s.mgmt.PutWebsocketAuth)
+		mgmt.PATCH("/ws-auth", s.mgmt.PutWebsocketAuth)

 		mgmt.GET("/request-retry", s.mgmt.GetRequestRetry)
 		mgmt.PUT("/request-retry", s.mgmt.PutRequestRetry)
@@ -508,6 +511,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
+		mgmt.POST("/vertex/import", s.mgmt.ImportVertexCredential)

 		mgmt.GET("/anthropic-auth-url", s.mgmt.RequestAnthropicToken)
 		mgmt.GET("/codex-auth-url", s.mgmt.RequestCodexToken)
@@ -703,7 +707,7 @@ func (s *Server) Stop(ctx context.Context) error {
 func corsMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		c.Header("Access-Control-Allow-Origin", "*")
-		c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
+		c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, PATCH, DELETE, OPTIONS")
 		c.Header("Access-Control-Allow-Headers", "*")

 		if c.Request.Method == "OPTIONS" {
--- a/internal/auth/vertex/keyutil.go
+++ b/internal/auth/vertex/keyutil.go
@@ -0,0 +1,208 @@
+package vertex
+
+import (
+	"crypto/rsa"
+	"crypto/x509"
+	"encoding/base64"
+	"encoding/json"
+	"encoding/pem"
+	"fmt"
+	"strings"
+)
+
+// NormalizeServiceAccountJSON normalizes the given JSON-encoded service account payload.
+// It returns the normalized JSON (with sanitized private_key) or, if normalization fails,
+// the original bytes and the encountered error.
+func NormalizeServiceAccountJSON(raw []byte) ([]byte, error) {
+	if len(raw) == 0 {
+		return raw, nil
+	}
+	var payload map[string]any
+	if err := json.Unmarshal(raw, &payload); err != nil {
+		return raw, err
+	}
+	normalized, err := NormalizeServiceAccountMap(payload)
+	if err != nil {
+		return raw, err
+	}
+	out, err := json.Marshal(normalized)
+	if err != nil {
+		return raw, err
+	}
+	return out, nil
+}
+
+// NormalizeServiceAccountMap returns a copy of the given service account map with
+// a sanitized private_key field that is guaranteed to contain a valid RSA PRIVATE KEY PEM block.
+func NormalizeServiceAccountMap(sa map[string]any) (map[string]any, error) {
+	if sa == nil {
+		return nil, fmt.Errorf("service account payload is empty")
+	}
+	pk, _ := sa["private_key"].(string)
+	if strings.TrimSpace(pk) == "" {
+		return nil, fmt.Errorf("service account missing private_key")
+	}
+	normalized, err := sanitizePrivateKey(pk)
+	if err != nil {
+		return nil, err
+	}
+	clone := make(map[string]any, len(sa))
+	for k, v := range sa {
+		clone[k] = v
+	}
+	clone["private_key"] = normalized
+	return clone, nil
+}
+
+func sanitizePrivateKey(raw string) (string, error) {
+	pk := strings.ReplaceAll(raw, "\r\n", "\n")
+	pk = strings.ReplaceAll(pk, "\r", "\n")
+	pk = stripANSIEscape(pk)
+	pk = strings.ToValidUTF8(pk, "")
+	pk = strings.TrimSpace(pk)
+
+	normalized := pk
+	if block, _ := pem.Decode([]byte(pk)); block == nil {
+		// Attempt to reconstruct from the textual payload.
+		if reconstructed, err := rebuildPEM(pk); err == nil {
+			normalized = reconstructed
+		} else {
+			return "", fmt.Errorf("private_key is not valid pem: %w", err)
+		}
+	}
+
+	block, _ := pem.Decode([]byte(normalized))
+	if block == nil {
+		return "", fmt.Errorf("private_key pem decode failed")
+	}
+
+	rsaBlock, err := ensureRSAPrivateKey(block)
+	if err != nil {
+		return "", err
+	}
+	return string(pem.EncodeToMemory(rsaBlock)), nil
+}
+
+func ensureRSAPrivateKey(block *pem.Block) (*pem.Block, error) {
+	if block == nil {
+		return nil, fmt.Errorf("pem block is nil")
+	}
+
+	if block.Type == "RSA PRIVATE KEY" {
+		if _, err := x509.ParsePKCS1PrivateKey(block.Bytes); err != nil {
+			return nil, fmt.Errorf("private_key invalid rsa: %w", err)
+		}
+		return block, nil
+	}
+
+	if block.Type == "PRIVATE KEY" {
+		key, err := x509.ParsePKCS8PrivateKey(block.Bytes)
+		if err != nil {
+			return nil, fmt.Errorf("private_key invalid pkcs8: %w", err)
+		}
+		rsaKey, ok := key.(*rsa.PrivateKey)
+		if !ok {
+			return nil, fmt.Errorf("private_key is not an RSA key")
+		}
+		der := x509.MarshalPKCS1PrivateKey(rsaKey)
+		return &pem.Block{Type: "RSA PRIVATE KEY", Bytes: der}, nil
+	}
+
+	// Attempt auto-detection: try PKCS#1 first, then PKCS#8.
+	if rsaKey, err := x509.ParsePKCS1PrivateKey(block.Bytes); err == nil {
+		der := x509.MarshalPKCS1PrivateKey(rsaKey)
+		return &pem.Block{Type: "RSA PRIVATE KEY", Bytes: der}, nil
+	}
+	if key, err := x509.ParsePKCS8PrivateKey(block.Bytes); err == nil {
+		if rsaKey, ok := key.(*rsa.PrivateKey); ok {
+			der := x509.MarshalPKCS1PrivateKey(rsaKey)
+			return &pem.Block{Type: "RSA PRIVATE KEY", Bytes: der}, nil
+		}
+	}
+	return nil, fmt.Errorf("private_key uses unsupported format")
+}
+
+func rebuildPEM(raw string) (string, error) {
+	kind := "PRIVATE KEY"
+	if strings.Contains(raw, "RSA PRIVATE KEY") {
+		kind = "RSA PRIVATE KEY"
+	}
+	header := "-----BEGIN " + kind + "-----"
+	footer := "-----END " + kind + "-----"
+	start := strings.Index(raw, header)
+	end := strings.Index(raw, footer)
+	if start < 0 || end <= start {
+		return "", fmt.Errorf("missing pem markers")
+	}
+	body := raw[start+len(header) : end]
+	payload := filterBase64(body)
+	if payload == "" {
+		return "", fmt.Errorf("private_key base64 payload empty")
+	}
+	der, err := base64.StdEncoding.DecodeString(payload)
+	if err != nil {
+		return "", fmt.Errorf("private_key base64 decode failed: %w", err)
+	}
+	block := &pem.Block{Type: kind, Bytes: der}
+	return string(pem.EncodeToMemory(block)), nil
+}
+
+func filterBase64(s string) string {
+	var b strings.Builder
+	for _, r := range s {
+		switch {
+		case r >= 'A' && r <= 'Z':
+			b.WriteRune(r)
+		case r >= 'a' && r <= 'z':
+			b.WriteRune(r)
+		case r >= '0' && r <= '9':
+			b.WriteRune(r)
+		case r == '+' || r == '/' || r == '=':
+			b.WriteRune(r)
+		default:
+			// skip
+		}
+	}
+	return b.String()
+}
+
+func stripANSIEscape(s string) string {
+	in := []rune(s)
+	var out []rune
+	for i := 0; i < len(in); i++ {
+		r := in[i]
+		if r != 0x1b {
+			out = append(out, r)
+			continue
+		}
+		if i+1 >= len(in) {
+			continue
+		}
+		next := in[i+1]
+		switch next {
+		case ']':
+			i += 2
+			for i < len(in) {
+				if in[i] == 0x07 {
+					break
+				}
+				if in[i] == 0x1b && i+1 < len(in) && in[i+1] == '\\' {
+					i++
+					break
+				}
+				i++
+			}
+		case '[':
+			i += 2
+			for i < len(in) {
+				if (in[i] >= 'A' && in[i] <= 'Z') || (in[i] >= 'a' && in[i] <= 'z') {
+					break
+				}
+				i++
+			}
+		default:
+			// skip single ESC
+		}
+	}
+	return string(out)
+}
--- a/internal/auth/vertex/vertex_credentials.go
+++ b/internal/auth/vertex/vertex_credentials.go
@@ -0,0 +1,66 @@
+// Package vertex provides token storage for Google Vertex AI Gemini via service account credentials.
+// It serialises service account JSON into an auth file that is consumed by the runtime executor.
+package vertex
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	log "github.com/sirupsen/logrus"
+)
+
+// VertexCredentialStorage stores the service account JSON for Vertex AI access.
+// The content is persisted verbatim under the "service_account" key, together with
+// helper fields for project, location and email to improve logging and discovery.
+type VertexCredentialStorage struct {
+	// ServiceAccount holds the parsed service account JSON content.
+	ServiceAccount map[string]any `json:"service_account"`
+
+	// ProjectID is derived from the service account JSON (project_id).
+	ProjectID string `json:"project_id"`
+
+	// Email is the client_email from the service account JSON.
+	Email string `json:"email"`
+
+	// Location optionally sets a default region (e.g., us-central1) for Vertex endpoints.
+	Location string `json:"location,omitempty"`
+
+	// Type is the provider identifier stored alongside credentials. Always "vertex".
+	Type string `json:"type"`
+}
+
+// SaveTokenToFile writes the credential payload to the given file path in JSON format.
+// It ensures the parent directory exists and logs the operation for transparency.
+func (s *VertexCredentialStorage) SaveTokenToFile(authFilePath string) error {
+	misc.LogSavingCredentials(authFilePath)
+	if s == nil {
+		return fmt.Errorf("vertex credential: storage is nil")
+	}
+	if s.ServiceAccount == nil {
+		return fmt.Errorf("vertex credential: service account content is empty")
+	}
+	// Ensure we tag the file with the provider type.
+	s.Type = "vertex"
+
+	if err := os.MkdirAll(filepath.Dir(authFilePath), 0o700); err != nil {
+		return fmt.Errorf("vertex credential: create directory failed: %w", err)
+	}
+	f, err := os.Create(authFilePath)
+	if err != nil {
+		return fmt.Errorf("vertex credential: create file failed: %w", err)
+	}
+	defer func() {
+		if errClose := f.Close(); errClose != nil {
+			log.Errorf("vertex credential: failed to close file: %v", errClose)
+		}
+	}()
+	enc := json.NewEncoder(f)
+	enc.SetIndent("", "  ")
+	if err = enc.Encode(s); err != nil {
+		return fmt.Errorf("vertex credential: encode failed: %w", err)
+	}
+	return nil
+}
--- a/internal/cmd/vertex_import.go
+++ b/internal/cmd/vertex_import.go
@@ -0,0 +1,123 @@
+// Package cmd contains CLI helpers. This file implements importing a Vertex AI
+// service account JSON into the auth store as a dedicated "vertex" credential.
+package cmd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	log "github.com/sirupsen/logrus"
+)
+
+// DoVertexImport imports a Google Cloud service account key JSON and persists
+// it as a "vertex" provider credential. The file content is embedded in the auth
+// file to allow portable deployment across stores.
+func DoVertexImport(cfg *config.Config, keyPath string) {
+	if cfg == nil {
+		cfg = &config.Config{}
+	}
+	if resolved, errResolve := util.ResolveAuthDir(cfg.AuthDir); errResolve == nil {
+		cfg.AuthDir = resolved
+	}
+	rawPath := strings.TrimSpace(keyPath)
+	if rawPath == "" {
+		log.Fatalf("vertex-import: missing service account key path")
+		return
+	}
+	data, errRead := os.ReadFile(rawPath)
+	if errRead != nil {
+		log.Fatalf("vertex-import: read file failed: %v", errRead)
+		return
+	}
+	var sa map[string]any
+	if errUnmarshal := json.Unmarshal(data, &sa); errUnmarshal != nil {
+		log.Fatalf("vertex-import: invalid service account json: %v", errUnmarshal)
+		return
+	}
+	// Validate and normalize private_key before saving
+	normalizedSA, errFix := vertex.NormalizeServiceAccountMap(sa)
+	if errFix != nil {
+		log.Fatalf("vertex-import: %v", errFix)
+		return
+	}
+	sa = normalizedSA
+	email, _ := sa["client_email"].(string)
+	projectID, _ := sa["project_id"].(string)
+	if strings.TrimSpace(projectID) == "" {
+		log.Fatalf("vertex-import: project_id missing in service account json")
+		return
+	}
+	if strings.TrimSpace(email) == "" {
+		// Keep empty email but warn
+		log.Warn("vertex-import: client_email missing in service account json")
+	}
+	// Default location if not provided by user. Can be edited in the saved file later.
+	location := "us-central1"
+
+	fileName := fmt.Sprintf("vertex-%s.json", sanitizeFilePart(projectID))
+	// Build auth record
+	storage := &vertex.VertexCredentialStorage{
+		ServiceAccount: sa,
+		ProjectID:      projectID,
+		Email:          email,
+		Location:       location,
+	}
+	metadata := map[string]any{
+		"service_account": sa,
+		"project_id":      projectID,
+		"email":           email,
+		"location":        location,
+		"type":            "vertex",
+		"label":           labelForVertex(projectID, email),
+	}
+	record := &coreauth.Auth{
+		ID:       fileName,
+		Provider: "vertex",
+		FileName: fileName,
+		Storage:  storage,
+		Metadata: metadata,
+	}
+
+	store := sdkAuth.GetTokenStore()
+	if setter, ok := store.(interface{ SetBaseDir(string) }); ok {
+		setter.SetBaseDir(cfg.AuthDir)
+	}
+	path, errSave := store.Save(context.Background(), record)
+	if errSave != nil {
+		log.Fatalf("vertex-import: save credential failed: %v", errSave)
+		return
+	}
+	fmt.Printf("Vertex credentials imported: %s\n", path)
+}
+
+func sanitizeFilePart(s string) string {
+	out := strings.TrimSpace(s)
+	replacers := []string{"/", "_", "\\", "_", ":", "_", " ", "-"}
+	for i := 0; i < len(replacers); i += 2 {
+		out = strings.ReplaceAll(out, replacers[i], replacers[i+1])
+	}
+	return out
+}
+
+func labelForVertex(projectID, email string) string {
+	p := strings.TrimSpace(projectID)
+	e := strings.TrimSpace(email)
+	if p != "" && e != "" {
+		return fmt.Sprintf("%s (%s)", p, e)
+	}
+	if p != "" {
+		return p
+	}
+	if e != "" {
+		return e
+	}
+	return "vertex"
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -5,6 +5,7 @@
 package config

 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"os"
@@ -100,6 +101,9 @@ type ClaudeKey struct {

 	// Models defines upstream model names and aliases for request routing.
 	Models []ClaudeModel `yaml:"models" json:"models"`
+
+	// Headers optionally adds extra HTTP headers for requests sent with this key.
+	Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"`
 }

 // ClaudeModel describes a mapping between an alias and the actual upstream model name.
@@ -123,6 +127,9 @@ type CodexKey struct {

 	// ProxyURL overrides the global proxy setting for this API key if provided.
 	ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
+
+	// Headers optionally adds extra HTTP headers for requests sent with this key.
+	Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"`
 }

 // GeminiKey represents the configuration for a Gemini API key,
@@ -159,6 +166,9 @@ type OpenAICompatibility struct {

 	// Models defines the model configurations including aliases for routing.
 	Models []OpenAICompatibilityModel `yaml:"models" json:"models"`
+
+	// Headers optionally adds extra HTTP headers for requests sent to this provider.
+	Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"`
 }

 // OpenAICompatibilityAPIKey represents an API key configuration with optional proxy setting.
@@ -246,23 +256,26 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Sync request authentication providers with inline API keys for backwards compatibility.
 	syncInlineAccessProvider(&cfg)

-	// Normalize Gemini API key configuration and migrate legacy entries.
-	cfg.SyncGeminiKeys()
-
-	// Sanitize OpenAI compatibility providers: drop entries without base-url
-	sanitizeOpenAICompatibility(&cfg)
+	// Sanitize Gemini API key configuration and migrate legacy entries.
+	cfg.SanitizeGeminiKeys()

 	// Sanitize Codex keys: drop entries without base-url
-	sanitizeCodexKeys(&cfg)
+	cfg.SanitizeCodexKeys()
+
+	// Sanitize Claude key headers
+	cfg.SanitizeClaudeKeys()
+
+	// Sanitize OpenAI compatibility providers: drop entries without base-url
+	cfg.SanitizeOpenAICompatibility()

 	// Return the populated configuration struct.
 	return &cfg, nil
 }

-// sanitizeOpenAICompatibility removes OpenAI-compatibility provider entries that are
+// SanitizeOpenAICompatibility removes OpenAI-compatibility provider entries that are
 // not actionable, specifically those missing a BaseURL. It trims whitespace before
 // evaluation and preserves the relative order of remaining entries.
-func sanitizeOpenAICompatibility(cfg *Config) {
+func (cfg *Config) SanitizeOpenAICompatibility() {
 	if cfg == nil || len(cfg.OpenAICompatibility) == 0 {
 		return
 	}
@@ -271,6 +284,7 @@ func sanitizeOpenAICompatibility(cfg *Config) {
 		e := cfg.OpenAICompatibility[i]
 		e.Name = strings.TrimSpace(e.Name)
 		e.BaseURL = strings.TrimSpace(e.BaseURL)
+		e.Headers = NormalizeHeaders(e.Headers)
 		if e.BaseURL == "" {
 			// Skip providers with no base-url; treated as removed
 			continue
@@ -280,9 +294,9 @@ func sanitizeOpenAICompatibility(cfg *Config) {
 	cfg.OpenAICompatibility = out
 }

-// sanitizeCodexKeys removes Codex API key entries missing a BaseURL.
+// SanitizeCodexKeys removes Codex API key entries missing a BaseURL.
 // It trims whitespace and preserves order for remaining entries.
-func sanitizeCodexKeys(cfg *Config) {
+func (cfg *Config) SanitizeCodexKeys() {
 	if cfg == nil || len(cfg.CodexKey) == 0 {
 		return
 	}
@@ -290,6 +304,7 @@ func sanitizeCodexKeys(cfg *Config) {
 	for i := range cfg.CodexKey {
 		e := cfg.CodexKey[i]
 		e.BaseURL = strings.TrimSpace(e.BaseURL)
+		e.Headers = NormalizeHeaders(e.Headers)
 		if e.BaseURL == "" {
 			continue
 		}
@@ -298,7 +313,19 @@ func sanitizeCodexKeys(cfg *Config) {
 	cfg.CodexKey = out
 }

-func (cfg *Config) SyncGeminiKeys() {
+// SanitizeClaudeKeys normalizes headers for Claude credentials.
+func (cfg *Config) SanitizeClaudeKeys() {
+	if cfg == nil || len(cfg.ClaudeKey) == 0 {
+		return
+	}
+	for i := range cfg.ClaudeKey {
+		entry := &cfg.ClaudeKey[i]
+		entry.Headers = NormalizeHeaders(entry.Headers)
+	}
+}
+
+// SanitizeGeminiKeys deduplicates and normalizes Gemini credentials.
+func (cfg *Config) SanitizeGeminiKeys() {
 	if cfg == nil {
 		return
 	}
@@ -313,7 +340,7 @@ func (cfg *Config) SyncGeminiKeys() {
 		}
 		entry.BaseURL = strings.TrimSpace(entry.BaseURL)
 		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
-		entry.Headers = normalizeGeminiHeaders(entry.Headers)
+		entry.Headers = NormalizeHeaders(entry.Headers)
 		if _, exists := seen[entry.APIKey]; exists {
 			continue
 		}
@@ -356,7 +383,8 @@ func looksLikeBcrypt(s string) bool {
 	return len(s) > 4 && (s[:4] == "$2a$" || s[:4] == "$2b$" || s[:4] == "$2y$")
 }

-func normalizeGeminiHeaders(headers map[string]string) map[string]string {
+// NormalizeHeaders trims header keys and values and removes empty pairs.
+func NormalizeHeaders(headers map[string]string) map[string]string {
 	if len(headers) == 0 {
 		return nil
 	}
@@ -435,13 +463,19 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {
 		return err
 	}
 	defer func() { _ = f.Close() }()
-	enc := yaml.NewEncoder(f)
+	var buf bytes.Buffer
+	enc := yaml.NewEncoder(&buf)
 	enc.SetIndent(2)
 	if err = enc.Encode(&original); err != nil {
 		_ = enc.Close()
 		return err
 	}
-	return enc.Close()
+	if err = enc.Close(); err != nil {
+		return err
+	}
+	data = NormalizeCommentIndentation(buf.Bytes())
+	_, err = f.Write(data)
+	return err
 }

 func sanitizeConfigForPersist(cfg *Config) *Config {
@@ -491,13 +525,40 @@ func SaveConfigPreserveCommentsUpdateNestedScalar(configFile string, path []stri
 		return err
 	}
 	defer func() { _ = f.Close() }()
-	enc := yaml.NewEncoder(f)
+	var buf bytes.Buffer
+	enc := yaml.NewEncoder(&buf)
 	enc.SetIndent(2)
 	if err = enc.Encode(&root); err != nil {
 		_ = enc.Close()
 		return err
 	}
-	return enc.Close()
+	if err = enc.Close(); err != nil {
+		return err
+	}
+	data = NormalizeCommentIndentation(buf.Bytes())
+	_, err = f.Write(data)
+	return err
+}
+
+// NormalizeCommentIndentation removes indentation from standalone YAML comment lines to keep them left aligned.
+func NormalizeCommentIndentation(data []byte) []byte {
+	lines := bytes.Split(data, []byte("\n"))
+	changed := false
+	for i, line := range lines {
+		trimmed := bytes.TrimLeft(line, " \t")
+		if len(trimmed) == 0 || trimmed[0] != '#' {
+			continue
+		}
+		if len(trimmed) == len(line) {
+			continue
+		}
+		lines[i] = append([]byte(nil), trimmed...)
+		changed = true
+	}
+	if !changed {
+		return data
+	}
+	return bytes.Join(lines, []byte("\n"))
 }

 // getOrCreateMapValue finds the value node for a given key in a mapping node.
@@ -739,6 +800,7 @@ func matchSequenceElement(original []*yaml.Node, used []bool, target *yaml.Node)
 				}
 			}
 		}
+	default:
 	}
 	// Fallback to structural equality to preserve nodes lacking explicit identifiers.
 	for i := range original {
--- a/internal/misc/codex_instructions/gpt_5_codex_prompt.md-007-8c75ed39d5bb94159d21072d7384765d94a9012b
+++ b/internal/misc/codex_instructions/gpt_5_codex_prompt.md-007-8c75ed39d5bb94159d21072d7384765d94a9012b
@@ -0,0 +1,107 @@
+You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.
+
+## General
+
+- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
+- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
+- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
+
+## Editing constraints
+
+- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.
+- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
+- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).
+- You may be in a dirty git worktree.
+    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
+    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
+    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
+    * If the changes are in unrelated files, just ignore them and don't revert them.
+- Do not amend a commit unless explicitly requested to do so.
+- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
+- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
+
+## Plan tool
+
+When using the planning tool:
+- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
+- Do not make single-step plans.
+- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
+
+## Codex CLI harness, sandboxing, and approvals
+
+The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.
+
+Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:
+- **read-only**: The sandbox only permits reading files.
+- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.
+- **danger-full-access**: No filesystem sandboxing - all commands are permitted.
+
+Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:
+- **restricted**: Requires approval
+- **enabled**: No approval needed
+
+Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are
+- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
+- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
+- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
+- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
+
+When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
+- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
+- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
+- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
+- (for all of these, you should weigh alternative paths that do not require approval)
+
+When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read.
+
+You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure.
+
+Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
+
+When requesting approval to execute a command that will require escalated privileges:
+  - Provide the `with_escalated_permissions` parameter with the boolean value true
+  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter
+
+## Special user requests
+
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
+- If the user asks for a "review", default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.
+
+## Presenting your work and final message
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+- Default: be very concise; friendly coding teammate tone.
+- Ask only when needed; suggest ideas; mirror the user's style.
+- For substantial work, summarize clearly; follow final‑answer formatting.
+- Skip heavy formatting for simple confirmations.
+- Don't dump large files you've written; reference paths only.
+- No "save/copy this file" - User is on the same machine.
+- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.
+- For code changes:
+  * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with "summary", just jump right in.
+  * If there are natural next steps the user may want to take, suggest them at the end of your response. Do not make suggestions if there are no natural next steps.
+  * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.
+- The user does not command execution outputs. When asked to show the output of a command (e.g. `git show`), relay the important details in your answer or summarize the key lines so the user understands the result.
+
+### Final answer structure and style guidelines
+
+- Plain text; CLI handles styling. Use structure only when it helps scanability.
+- Headers: optional; short Title Case (1-3 words) wrapped in **…**; no blank line before the first bullet; add only if they truly help.
+- Bullets: use - ; merge related points; keep to one line when possible; 4–6 per list ordered by importance; keep phrasing consistent.
+- Monospace: backticks for commands/paths/env vars/code ids and inline examples; use for literal keyword bullets; never combine with **.
+- Code samples or multi-line snippets should be wrapped in fenced code blocks; include an info string as often as possible.
+- Structure: group related bullets; order sections general → specific → supporting; for subsections, start with a bolded keyword bullet, then items; match complexity to the task.
+- Tone: collaborative, concise, factual; present tense, active voice; self‑contained; no "above/below"; parallel wording.
+- Don'ts: no nested bullets/hierarchies; no ANSI codes; don't cram unrelated keywords; keep keyword lists short—wrap/reformat if long; avoid naming formatting styles in answers.
+- Adaptation: code explanations → precise, structured with code refs; simple tasks → lead with outcome; big changes → logical walkthrough + rationale + next actions; casual one-offs → plain sentences, no headers/bullets.
+- File References: When referencing files in your response, make sure to include the relevant start line and always follow the below rules:
+  * Use inline code to make file paths clickable.
+  * Each reference should have a stand alone path. Even if it's the same file.
+  * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix.
+  * Line/column (1‑based, optional): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
+  * Do not use URIs like file://, vscode://, or https://.
+  * Do not provide range of lines
+  * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5
--- a/internal/misc/codex_instructions/review_prompt.md-002-f842849bec97326ad6fb40e9955b6ba9f0f3fc0d
+++ b/internal/misc/codex_instructions/review_prompt.md-002-f842849bec97326ad6fb40e9955b6ba9f0f3fc0d
@@ -0,0 +1,87 @@
+# Review guidelines:
+
+You are acting as a reviewer for a proposed code change made by another engineer.
+
+Below are some default guidelines for determining whether the original author would appreciate the issue being flagged.
+
+These are not the final word in determining whether an issue is a bug. In many cases, you will encounter other, more specific guidelines. These may be present elsewhere in a developer message, a user message, a file, or even elsewhere in this system message.
+Those guidelines should be considered to override these general instructions.
+
+Here are the general guidelines for determining whether something is a bug and should be flagged.
+
+1. It meaningfully impacts the accuracy, performance, security, or maintainability of the code.
+2. The bug is discrete and actionable (i.e. not a general issue with the codebase or a combination of multiple issues).
+3. Fixing the bug does not demand a level of rigor that is not present in the rest of the codebase (e.g. one doesn't need very detailed comments and input validation in a repository of one-off scripts in personal projects)
+4. The bug was introduced in the commit (pre-existing bugs should not be flagged).
+5. The author of the original PR would likely fix the issue if they were made aware of it.
+6. The bug does not rely on unstated assumptions about the codebase or author's intent.
+7. It is not enough to speculate that a change may disrupt another part of the codebase, to be considered a bug, one must identify the other parts of the code that are provably affected.
+8. The bug is clearly not just an intentional change by the original author.
+
+When flagging a bug, you will also provide an accompanying comment. Once again, these guidelines are not the final word on how to construct a comment -- defer to any subsequent guidelines that you encounter.
+
+1. The comment should be clear about why the issue is a bug.
+2. The comment should appropriately communicate the severity of the issue. It should not claim that an issue is more severe than it actually is.
+3. The comment should be brief. The body should be at most 1 paragraph. It should not introduce line breaks within the natural language flow unless it is necessary for the code fragment.
+4. The comment should not include any chunks of code longer than 3 lines. Any code chunks should be wrapped in markdown inline code tags or a code block.
+5. The comment should clearly and explicitly communicate the scenarios, environments, or inputs that are necessary for the bug to arise. The comment should immediately indicate that the issue's severity depends on these factors.
+6. The comment's tone should be matter-of-fact and not accusatory or overly positive. It should read as a helpful AI assistant suggestion without sounding too much like a human reviewer.
+7. The comment should be written such that the original author can immediately grasp the idea without close reading.
+8. The comment should avoid excessive flattery and comments that are not helpful to the original author. The comment should avoid phrasing like "Great job ...", "Thanks for ...".
+
+Below are some more detailed guidelines that you should apply to this specific review.
+
+HOW MANY FINDINGS TO RETURN:
+
+Output all findings that the original author would fix if they knew about it. If there is no finding that a person would definitely love to see and fix, prefer outputting no findings. Do not stop at the first qualifying finding. Continue until you've listed every qualifying finding.
+
+GUIDELINES:
+
+- Ignore trivial style unless it obscures meaning or violates documented standards.
+- Use one comment per distinct issue (or a multi-line range if necessary).
+- Use ```suggestion blocks ONLY for concrete replacement code (minimal lines; no commentary inside the block).
+- In every ```suggestion block, preserve the exact leading whitespace of the replaced lines (spaces vs tabs, number of spaces).
+- Do NOT introduce or remove outer indentation levels unless that is the actual fix.
+
+The comments will be presented in the code review as inline comments. You should avoid providing unnecessary location details in the comment body. Always keep the line range as short as possible for interpreting the issue. Avoid ranges longer than 5–10 lines; instead, choose the most suitable subrange that pinpoints the problem.
+
+At the beginning of the finding title, tag the bug with priority level. For example "[P1] Un-padding slices along wrong tensor dimensions". [P0] – Drop everything to fix.  Blocking release, operations, or major usage. Only use for universal issues that do not depend on any assumptions about the inputs. · [P1] – Urgent. Should be addressed in the next cycle · [P2] – Normal. To be fixed eventually · [P3] – Low. Nice to have.
+
+Additionally, include a numeric priority field in the JSON output for each finding: set "priority" to 0 for P0, 1 for P1, 2 for P2, or 3 for P3. If a priority cannot be determined, omit the field or use null.
+
+At the end of your findings, output an "overall correctness" verdict of whether or not the patch should be considered "correct".
+Correct implies that existing code and tests will not break, and the patch is free of bugs and other blocking issues.
+Ignore non-blocking issues such as style, formatting, typos, documentation, and other nits.
+
+FORMATTING GUIDELINES:
+The finding description should be one paragraph.
+
+OUTPUT FORMAT:
+
+## Output schema  — MUST MATCH *exactly*
+
+```json
+{
+  "findings": [
+    {
+      "title": "<≤ 80 chars, imperative>",
+      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
+      "confidence_score": <float 0.0-1.0>,
+      "priority": <int 0-3, optional>,
+      "code_location": {
+        "absolute_file_path": "<file path>",
+        "line_range": {"start": <int>, "end": <int>}
+      }
+    }
+  ],
+  "overall_correctness": "patch is correct" | "patch is incorrect",
+  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
+  "overall_confidence_score": <float 0.0-1.0>
+}
+```
+
+* **Do not** wrap the JSON in markdown fences or extra prose.
+* The code_location field is required and must include absolute_file_path and line_range.
+* Line ranges must be as short as possible for interpreting the issue (avoid ranges over 5–10 lines; pick the most suitable subrange).
+* The code_location should overlap with the diff.
+* Do not generate a PR fix.
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -124,26 +124,53 @@ func GetGeminiModels() []*ModelInfo { return GeminiModels() }

 // GetGeminiCLIModels returns the standard Gemini model definitions
 func GetGeminiCLIModels() []*ModelInfo {
-	base := GeminiModels()
-	return append(base,
-		[]*ModelInfo{
-			{
-				ID:                         "gemini-3-pro-preview-11-2025",
-				Object:                     "model",
-				Created:                    time.Now().Unix(),
-				OwnedBy:                    "google",
-				Type:                       "gemini",
-				Name:                       "models/gemini-3-pro-preview-11-2025",
-				Version:                    "3",
-				DisplayName:                "Gemini 3 Pro Preview 11-2025",
-				Description:                "Latest preview of Gemini Pro",
-				InputTokenLimit:            1048576,
-				OutputTokenLimit:           65536,
-				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-				Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-			},
-		}...,
-	)
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    time.Now().Unix(),
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    time.Now().Unix(),
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview-11-2025",
+			Object:                     "model",
+			Created:                    time.Now().Unix(),
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview-11-2025",
+			Version:                    "3",
+			DisplayName:                "Gemini 3 Pro Preview 11-2025",
+			Description:                "Latest preview of Gemini Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+	}
 }

 // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
@@ -352,17 +379,43 @@ func GetOpenAIModels() []*ModelInfo {
 			SupportedParameters: []string{"tools"},
 		},
 		{
-			ID:                  "codex-mini-latest",
+			ID:                  "gpt-5-codex-mini",
 			Object:              "model",
 			Created:             time.Now().Unix(),
 			OwnedBy:             "openai",
 			Type:                "openai",
-			Version:             "1.0",
-			DisplayName:         "Codex Mini",
-			Description:         "Lightweight code generation model",
-			ContextLength:       4096,
-			MaxCompletionTokens: 2048,
-			SupportedParameters: []string{"temperature", "max_tokens", "stream", "stop"},
+			Version:             "gpt-5-2025-11-07",
+			DisplayName:         "GPT 5 Codex Mini",
+			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+		},
+		{
+			ID:                  "gpt-5-codex-mini-medium",
+			Object:              "model",
+			Created:             time.Now().Unix(),
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-11-07",
+			DisplayName:         "GPT 5 Codex Mini Medium",
+			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+		},
+		{
+			ID:                  "gpt-5-codex-mini-high",
+			Object:              "model",
+			Created:             time.Now().Unix(),
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-11-07",
+			DisplayName:         "GPT 5 Codex Mini High",
+			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
 		},
 	}
 }
@@ -438,6 +491,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)"},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct"},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B"},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2"},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -17,6 +17,7 @@ import (
 	claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -67,7 +68,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if err != nil {
 		return resp, err
 	}
-	applyClaudeHeaders(httpReq, apiKey, false)
+	applyClaudeHeaders(httpReq, auth, apiKey, false)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -159,7 +160,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if err != nil {
 		return nil, err
 	}
-	applyClaudeHeaders(httpReq, apiKey, true)
+	applyClaudeHeaders(httpReq, auth, apiKey, true)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -290,7 +291,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
-	applyClaudeHeaders(httpReq, apiKey, false)
+	applyClaudeHeaders(httpReq, auth, apiKey, false)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -529,7 +530,7 @@ func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadClos
 	return body, nil
 }

-func applyClaudeHeaders(r *http.Request, apiKey string, stream bool) {
+func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool) {
 	r.Header.Set("Authorization", "Bearer "+apiKey)
 	r.Header.Set("Content-Type", "application/json")

@@ -564,9 +565,14 @@ func applyClaudeHeaders(r *http.Request, apiKey string, stream bool) {
 	r.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd")
 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
-		return
+	} else {
+		r.Header.Set("Accept", "application/json")
 	}
-	r.Header.Set("Accept", "application/json")
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(r, attrs)
 }

 func claudeCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -75,6 +75,16 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		case "gpt-5-codex-high":
 			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
 		}
+	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, req.Model) {
+		body, _ = sjson.SetBytes(body, "model", "gpt-5-codex-mini")
+		switch req.Model {
+		case "gpt-5-codex-mini-medium":
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
+		case "gpt-5-codex-mini-high":
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
+		default:
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
+		}
 	}

 	body, _ = sjson.SetBytes(body, "stream", true)
@@ -188,6 +198,14 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		case "gpt-5-codex-high":
 			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
 		}
+	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, req.Model) {
+		body, _ = sjson.SetBytes(body, "model", "gpt-5-codex-mini")
+		switch req.Model {
+		case "gpt-5-codex-mini-medium":
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
+		case "gpt-5-codex-mini-high":
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
+		}
 	}

 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -312,6 +330,17 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 		default:
 			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
 		}
+	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, req.Model) {
+		modelForCounting = "gpt-5"
+		body, _ = sjson.SetBytes(body, "model", "codex-mini-latest")
+		switch req.Model {
+		case "gpt-5-codex-mini-medium":
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
+		case "gpt-5-codex-mini-high":
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
+		default:
+			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
+		}
 	}

 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -508,6 +537,11 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form
 				codexCacheMap[key] = cache
 			}
 		}
+	} else if from == "openai-response" {
+		promptCacheKey := gjson.GetBytes(req.Payload, "prompt_cache_key")
+		if promptCacheKey.Exists() {
+			cache.ID = promptCacheKey.String()
+		}
 	}

 	rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID)
@@ -551,6 +585,11 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string) {
 			}
 		}
 	}
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(r, attrs)
 }

 func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -495,44 +495,11 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string {
 }

 func applyGeminiHeaders(req *http.Request, auth *cliproxyauth.Auth) {
-	if req == nil {
-		return
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
 	}
-	headers := geminiCustomHeaders(auth)
-	if len(headers) == 0 {
-		return
-	}
-	for k, v := range headers {
-		if k == "" || v == "" {
-			continue
-		}
-		req.Header.Set(k, v)
-	}
-}
-
-func geminiCustomHeaders(auth *cliproxyauth.Auth) map[string]string {
-	if auth == nil || auth.Attributes == nil {
-		return nil
-	}
-	headers := make(map[string]string, len(auth.Attributes))
-	for k, v := range auth.Attributes {
-		if !strings.HasPrefix(k, "header:") {
-			continue
-		}
-		name := strings.TrimSpace(strings.TrimPrefix(k, "header:"))
-		if name == "" {
-			continue
-		}
-		val := strings.TrimSpace(v)
-		if val == "" {
-			continue
-		}
-		headers[name] = val
-	}
-	if len(headers) == 0 {
-		return nil
-	}
-	return headers
+	util.ApplyCustomHeadersFromAttrs(req, attrs)
 }

 func fixGeminiImageAspectRatio(modelName string, rawJSON []byte) []byte {
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -0,0 +1,421 @@
+// Package executor contains provider executors. This file implements the Vertex AI
+// Gemini executor that talks to Google Vertex AI endpoints using service account
+// credentials imported by the CLI.
+package executor
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+	"golang.org/x/oauth2/google"
+)
+
+const (
+	// vertexAPIVersion aligns with current public Vertex Generative AI API.
+	vertexAPIVersion = "v1"
+)
+
+// GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials.
+type GeminiVertexExecutor struct {
+	cfg *config.Config
+}
+
+// NewGeminiVertexExecutor constructs the Vertex executor.
+func NewGeminiVertexExecutor(cfg *config.Config) *GeminiVertexExecutor {
+	return &GeminiVertexExecutor{cfg: cfg}
+}
+
+// Identifier returns provider key for manager routing.
+func (e *GeminiVertexExecutor) Identifier() string { return "vertex" }
+
+// PrepareRequest is a no-op for Vertex.
+func (e *GeminiVertexExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
+	return nil
+}
+
+// Execute handles non-streaming requests.
+func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	projectID, location, saJSON, errCreds := vertexCreds(auth)
+	if errCreds != nil {
+		return resp, errCreds
+	}
+
+	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	defer reporter.trackFailure(ctx, &err)
+
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("gemini")
+	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+		if budgetOverride != nil {
+			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
+			budgetOverride = &norm
+		}
+		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
+	}
+	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
+	body = fixGeminiImageAspectRatio(req.Model, body)
+
+	action := "generateContent"
+	if req.Metadata != nil {
+		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
+			action = "countTokens"
+		}
+	}
+	baseURL := vertexBaseURL(location)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
+	if opts.Alt != "" && action != "countTokens" {
+		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	}
+	body, _ = sjson.DeleteBytes(body, "session_id")
+
+	httpReq, errNewReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if errNewReq != nil {
+		return resp, errNewReq
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if token, errTok := vertexAccessToken(ctx, saJSON); errTok == nil && token != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+token)
+	} else if errTok != nil {
+		log.Errorf("vertex executor: access token error: %v", errTok)
+		return resp, statusErr{code: 500, msg: "internal server error"}
+	}
+	applyGeminiHeaders(httpReq, auth)
+
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       url,
+		Method:    http.MethodPost,
+		Headers:   httpReq.Header.Clone(),
+		Body:      body,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpResp, errDo := httpClient.Do(httpReq)
+	if errDo != nil {
+		recordAPIResponseError(ctx, e.cfg, errDo)
+		return resp, errDo
+	}
+	defer func() {
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Errorf("vertex executor: close response body error: %v", errClose)
+		}
+	}()
+	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		b, _ := io.ReadAll(httpResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, b)
+		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
+		return resp, err
+	}
+	data, errRead := io.ReadAll(httpResp.Body)
+	if errRead != nil {
+		recordAPIResponseError(ctx, e.cfg, errRead)
+		return resp, errRead
+	}
+	appendAPIResponseChunk(ctx, e.cfg, data)
+	reporter.publish(ctx, parseGeminiUsage(data))
+	var param any
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
+	return resp, nil
+}
+
+// ExecuteStream handles SSE streaming for Vertex.
+func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	projectID, location, saJSON, errCreds := vertexCreds(auth)
+	if errCreds != nil {
+		return nil, errCreds
+	}
+
+	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	defer reporter.trackFailure(ctx, &err)
+
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("gemini")
+	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+		if budgetOverride != nil {
+			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
+			budgetOverride = &norm
+		}
+		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
+	}
+	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
+	body = fixGeminiImageAspectRatio(req.Model, body)
+
+	baseURL := vertexBaseURL(location)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
+	if opts.Alt == "" {
+		url = url + "?alt=sse"
+	} else {
+		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	}
+	body, _ = sjson.DeleteBytes(body, "session_id")
+
+	httpReq, errNewReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if errNewReq != nil {
+		return nil, errNewReq
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if token, errTok := vertexAccessToken(ctx, saJSON); errTok == nil && token != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+token)
+	} else if errTok != nil {
+		log.Errorf("vertex executor: access token error: %v", errTok)
+		return nil, statusErr{code: 500, msg: "internal server error"}
+	}
+	applyGeminiHeaders(httpReq, auth)
+
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       url,
+		Method:    http.MethodPost,
+		Headers:   httpReq.Header.Clone(),
+		Body:      body,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpResp, errDo := httpClient.Do(httpReq)
+	if errDo != nil {
+		recordAPIResponseError(ctx, e.cfg, errDo)
+		return nil, errDo
+	}
+	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		b, _ := io.ReadAll(httpResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, b)
+		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Errorf("vertex executor: close response body error: %v", errClose)
+		}
+		return nil, statusErr{code: httpResp.StatusCode, msg: string(b)}
+	}
+
+	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
+	go func() {
+		defer close(out)
+		defer func() {
+			if errClose := httpResp.Body.Close(); errClose != nil {
+				log.Errorf("vertex executor: close response body error: %v", errClose)
+			}
+		}()
+		scanner := bufio.NewScanner(httpResp.Body)
+		buf := make([]byte, 20_971_520)
+		scanner.Buffer(buf, 20_971_520)
+		var param any
+		for scanner.Scan() {
+			line := scanner.Bytes()
+			appendAPIResponseChunk(ctx, e.cfg, line)
+			if detail, ok := parseGeminiStreamUsage(line); ok {
+				reporter.publish(ctx, detail)
+			}
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			for i := range lines {
+				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+			}
+		}
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), &param)
+		for i := range lines {
+			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+		}
+		if errScan := scanner.Err(); errScan != nil {
+			recordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.publishFailure(ctx)
+			out <- cliproxyexecutor.StreamChunk{Err: errScan}
+		}
+	}()
+	return stream, nil
+}
+
+// CountTokens calls Vertex countTokens endpoint.
+func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	projectID, location, saJSON, errCreds := vertexCreds(auth)
+	if errCreds != nil {
+		return cliproxyexecutor.Response{}, errCreds
+	}
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("gemini")
+	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+		if budgetOverride != nil {
+			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
+			budgetOverride = &norm
+		}
+		translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
+	}
+	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
+	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
+	respCtx := context.WithValue(ctx, "alt", opts.Alt)
+	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
+	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
+	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
+
+	baseURL := vertexBaseURL(location)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
+
+	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
+	if errNewReq != nil {
+		return cliproxyexecutor.Response{}, errNewReq
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if token, errTok := vertexAccessToken(ctx, saJSON); errTok == nil && token != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+token)
+	} else if errTok != nil {
+		log.Errorf("vertex executor: access token error: %v", errTok)
+		return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
+	}
+	applyGeminiHeaders(httpReq, auth)
+
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       url,
+		Method:    http.MethodPost,
+		Headers:   httpReq.Header.Clone(),
+		Body:      translatedReq,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpResp, errDo := httpClient.Do(httpReq)
+	if errDo != nil {
+		recordAPIResponseError(ctx, e.cfg, errDo)
+		return cliproxyexecutor.Response{}, errDo
+	}
+	defer func() {
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Errorf("vertex executor: close response body error: %v", errClose)
+		}
+	}()
+	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		b, _ := io.ReadAll(httpResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, b)
+		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
+	}
+	data, errRead := io.ReadAll(httpResp.Body)
+	if errRead != nil {
+		recordAPIResponseError(ctx, e.cfg, errRead)
+		return cliproxyexecutor.Response{}, errRead
+	}
+	appendAPIResponseChunk(ctx, e.cfg, data)
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
+	}
+	count := gjson.GetBytes(data, "totalTokens").Int()
+	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
+	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
+}
+
+// Refresh is a no-op for service account based credentials.
+func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
+	return auth, nil
+}
+
+// vertexCreds extracts project, location and raw service account JSON from auth metadata.
+func vertexCreds(a *cliproxyauth.Auth) (projectID, location string, serviceAccountJSON []byte, err error) {
+	if a == nil || a.Metadata == nil {
+		return "", "", nil, fmt.Errorf("vertex executor: missing auth metadata")
+	}
+	if v, ok := a.Metadata["project_id"].(string); ok {
+		projectID = strings.TrimSpace(v)
+	}
+	if projectID == "" {
+		// Some service accounts may use "project"; still prefer standard field
+		if v, ok := a.Metadata["project"].(string); ok {
+			projectID = strings.TrimSpace(v)
+		}
+	}
+	if projectID == "" {
+		return "", "", nil, fmt.Errorf("vertex executor: missing project_id in credentials")
+	}
+	if v, ok := a.Metadata["location"].(string); ok && strings.TrimSpace(v) != "" {
+		location = strings.TrimSpace(v)
+	} else {
+		location = "us-central1"
+	}
+	var sa map[string]any
+	if raw, ok := a.Metadata["service_account"].(map[string]any); ok {
+		sa = raw
+	}
+	if sa == nil {
+		return "", "", nil, fmt.Errorf("vertex executor: missing service_account in credentials")
+	}
+	normalized, errNorm := vertexauth.NormalizeServiceAccountMap(sa)
+	if errNorm != nil {
+		return "", "", nil, fmt.Errorf("vertex executor: %w", errNorm)
+	}
+	saJSON, errMarshal := json.Marshal(normalized)
+	if errMarshal != nil {
+		return "", "", nil, fmt.Errorf("vertex executor: marshal service_account failed: %w", errMarshal)
+	}
+	return projectID, location, saJSON, nil
+}
+
+func vertexBaseURL(location string) string {
+	loc := strings.TrimSpace(location)
+	if loc == "" {
+		loc = "us-central1"
+	}
+	return fmt.Sprintf("https://%s-aiplatform.googleapis.com", loc)
+}
+
+func vertexAccessToken(ctx context.Context, saJSON []byte) (string, error) {
+	// Use cloud-platform scope for Vertex AI.
+	creds, errCreds := google.CredentialsFromJSON(ctx, saJSON, "https://www.googleapis.com/auth/cloud-platform")
+	if errCreds != nil {
+		return "", fmt.Errorf("vertex executor: parse service account json failed: %w", errCreds)
+	}
+	tok, errTok := creds.TokenSource.Token()
+	if errTok != nil {
+		return "", fmt.Errorf("vertex executor: get access token failed: %w", errTok)
+	}
+	return tok.AccessToken, nil
+}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -10,6 +10,7 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -66,6 +67,11 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		httpReq.Header.Set("Authorization", "Bearer "+apiKey)
 	}
 	httpReq.Header.Set("User-Agent", "cli-proxy-openai-compat")
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -110,6 +116,8 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	}
 	appendAPIResponseChunk(ctx, e.cfg, body)
 	reporter.publish(ctx, parseOpenAIUsage(body))
+	// Ensure we at least record the request even if upstream doesn't return usage
+	reporter.ensurePublished(ctx)
 	// Translate response back to source format when needed
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, body, &param)
@@ -143,6 +151,11 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		httpReq.Header.Set("Authorization", "Bearer "+apiKey)
 	}
 	httpReq.Header.Set("User-Agent", "cli-proxy-openai-compat")
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
 	httpReq.Header.Set("Accept", "text/event-stream")
 	httpReq.Header.Set("Cache-Control", "no-cache")
 	var authID, authLabel, authType, authValue string
@@ -214,6 +227,8 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 			reporter.publishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
+		// Ensure we record the request if no usage chunk was ever seen
+		reporter.ensurePublished(ctx)
 	}()
 	return stream, nil
 }
--- a/internal/runtime/executor/usage_helpers.go
+++ b/internal/runtime/executor/usage_helpers.go
@@ -84,6 +84,28 @@ func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
 	})
 }

+// ensurePublished guarantees that a usage record is emitted exactly once.
+// It is safe to call multiple times; only the first call wins due to once.Do.
+// This is used to ensure request counting even when upstream responses do not
+// include any usage fields (tokens), especially for streaming paths.
+func (r *usageReporter) ensurePublished(ctx context.Context) {
+	if r == nil {
+		return
+	}
+	r.once.Do(func() {
+		usage.PublishRecord(ctx, usage.Record{
+			Provider:    r.provider,
+			Model:       r.model,
+			Source:      r.source,
+			APIKey:      r.apiKey,
+			AuthID:      r.authID,
+			RequestedAt: r.requestedAt,
+			Failed:      false,
+			Detail:      usage.Detail{},
+		})
+	})
+}
+
 func apiKeyFromContext(ctx context.Context) string {
 	if ctx == nil {
 		return ""
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -65,17 +65,23 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var normalized int
-			if v := tc.Get("thinking_budget"); v.Exists() {
+
+			if v := tc.Get("thinkingBudget"); v.Exists() {
+				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				setBudget = true
+			} else if v := tc.Get("thinking_budget"); v.Exists() {
 				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
 				setBudget = true
 			}
-			if v := tc.Get("include_thoughts"); v.Exists() {
+
+			if v := tc.Get("includeThoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget {
-				if normalized != 0 {
-					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-				}
+			} else if v := tc.Get("include_thoughts"); v.Exists() {
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
+			} else if setBudget && normalized != 0 {
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
 	}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -65,18 +65,23 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var normalized int
-			if v := tc.Get("thinking_budget"); v.Exists() {
-				// Normalize budget to model range
+
+			if v := tc.Get("thinkingBudget"); v.Exists() {
+				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
+				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
+				setBudget = true
+			} else if v := tc.Get("thinking_budget"); v.Exists() {
 				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
 				setBudget = true
 			}
-			if v := tc.Get("include_thoughts"); v.Exists() {
+
+			if v := tc.Get("includeThoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget {
-				if normalized != 0 {
-					out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-				}
+			} else if v := tc.Get("include_thoughts"); v.Exists() {
+				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
+			} else if setBudget && normalized != 0 {
+				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
 	}
@@ -154,7 +159,6 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				}
 			}
 		}
-		fmt.Printf("11111")

 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -133,27 +133,16 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 					return true
 				})

-				// Create main message if there's text content or tool calls
-				if len(contentItems) > 0 || len(toolCalls) > 0 {
+				// Emit text/image content as one message
+				if len(contentItems) > 0 {
 					msgJSON := `{"role":"","content":""}`
 					msgJSON, _ = sjson.Set(msgJSON, "role", role)

-					// Set content
-					if len(contentItems) > 0 {
-						contentArrayJSON := "[]"
-						for _, contentItem := range contentItems {
-							contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
-						}
-						msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
-					} else {
-						msgJSON, _ = sjson.Set(msgJSON, "content", "")
-					}
-
-					// Set tool calls for assistant messages
-					if role == "assistant" && len(toolCalls) > 0 {
-						toolCallsJSON, _ := json.Marshal(toolCalls)
-						msgJSON, _ = sjson.SetRaw(msgJSON, "tool_calls", string(toolCallsJSON))
+					contentArrayJSON := "[]"
+					for _, contentItem := range contentItems {
+						contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
 					}
+					msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)

 					contentValue := gjson.Get(msgJSON, "content")
 					hasContent := false
@@ -168,11 +157,19 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 						hasContent = contentValue.Raw != "" && contentValue.Raw != "null"
 					}

-					if hasContent || len(toolCalls) != 0 {
+					if hasContent {
 						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
 					}
 				}

+				// Emit tool calls in a separate assistant message
+				if role == "assistant" && len(toolCalls) > 0 {
+					toolCallMsgJSON := `{"role":"assistant","tool_calls":[]}`
+					toolCallsJSON, _ := json.Marshal(toolCalls)
+					toolCallMsgJSON, _ = sjson.SetRaw(toolCallMsgJSON, "tool_calls", string(toolCallsJSON))
+					messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolCallMsgJSON).Value())
+				}
+
 			} else if contentResult.Exists() && contentResult.Type == gjson.String {
 				// Simple string content
 				msgJSON := `{"role":"","content":""}`
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -85,6 +85,58 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	var openAIMessages []interface{}
 	var toolCallIDs []string // Track tool call IDs for matching with tool results

+	// System instruction -> OpenAI system message
+	// Gemini may provide `systemInstruction` or `system_instruction`; support both keys.
+	systemInstruction := root.Get("systemInstruction")
+	if !systemInstruction.Exists() {
+		systemInstruction = root.Get("system_instruction")
+	}
+	if systemInstruction.Exists() {
+		parts := systemInstruction.Get("parts")
+		msg := map[string]interface{}{
+			"role":    "system",
+			"content": []interface{}{},
+		}
+
+		var aggregatedParts []interface{}
+
+		if parts.Exists() && parts.IsArray() {
+			parts.ForEach(func(_, part gjson.Result) bool {
+				// Handle text parts
+				if text := part.Get("text"); text.Exists() {
+					formattedText := text.String()
+					aggregatedParts = append(aggregatedParts, map[string]interface{}{
+						"type": "text",
+						"text": formattedText,
+					})
+				}
+
+				// Handle inline data (e.g., images)
+				if inlineData := part.Get("inlineData"); inlineData.Exists() {
+					mimeType := inlineData.Get("mimeType").String()
+					if mimeType == "" {
+						mimeType = "application/octet-stream"
+					}
+					data := inlineData.Get("data").String()
+					imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
+
+					aggregatedParts = append(aggregatedParts, map[string]interface{}{
+						"type": "image_url",
+						"image_url": map[string]interface{}{
+							"url": imageURL,
+						},
+					})
+				}
+				return true
+			})
+		}
+
+		if len(aggregatedParts) > 0 {
+			msg["content"] = aggregatedParts
+			openAIMessages = append(openAIMessages, msg)
+		}
+	}
+
 	if contents := root.Get("contents"); contents.Exists() && contents.IsArray() {
 		contents.ForEach(func(_, content gjson.Result) bool {
 			role := content.Get("role").String()
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -2,6 +2,7 @@ package responses

 import (
 	"bytes"
+
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -147,6 +148,11 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu

 			return true
 		})
+	} else if input.Type == gjson.String {
+		msg := "{}"
+		msg, _ = sjson.Set(msg, "role", "user")
+		msg, _ = sjson.Set(msg, "content", input.String())
+		out, _ = sjson.SetRaw(out, "messages.-1", msg)
 	}

 	// Convert tools from responses format to chat completions format
--- a/internal/util/header_helpers.go
+++ b/internal/util/header_helpers.go
@@ -0,0 +1,52 @@
+package util
+
+import (
+	"net/http"
+	"strings"
+)
+
+// ApplyCustomHeadersFromAttrs applies user-defined headers stored in the provided attributes map.
+// Custom headers override built-in defaults when conflicts occur.
+func ApplyCustomHeadersFromAttrs(r *http.Request, attrs map[string]string) {
+	if r == nil {
+		return
+	}
+	applyCustomHeaders(r, extractCustomHeaders(attrs))
+}
+
+func extractCustomHeaders(attrs map[string]string) map[string]string {
+	if len(attrs) == 0 {
+		return nil
+	}
+	headers := make(map[string]string)
+	for k, v := range attrs {
+		if !strings.HasPrefix(k, "header:") {
+			continue
+		}
+		name := strings.TrimSpace(strings.TrimPrefix(k, "header:"))
+		if name == "" {
+			continue
+		}
+		val := strings.TrimSpace(v)
+		if val == "" {
+			continue
+		}
+		headers[name] = val
+	}
+	if len(headers) == 0 {
+		return nil
+	}
+	return headers
+}
+
+func applyCustomHeaders(r *http.Request, headers map[string]string) {
+	if r == nil || len(headers) == 0 {
+		return
+	}
+	for k, v := range headers {
+		if k == "" || v == "" {
+			continue
+		}
+		r.Header.Set(k, v)
+	}
+}
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -41,24 +41,26 @@ type authDirProvider interface {

 // Watcher manages file watching for configuration and authentication files
 type Watcher struct {
-	configPath      string
-	authDir         string
-	config          *config.Config
-	clientsMutex    sync.RWMutex
-	reloadCallback  func(*config.Config)
-	watcher         *fsnotify.Watcher
-	lastAuthHashes  map[string]string
-	lastConfigHash  string
-	authQueue       chan<- AuthUpdate
-	currentAuths    map[string]*coreauth.Auth
-	dispatchMu      sync.Mutex
-	dispatchCond    *sync.Cond
-	pendingUpdates  map[string]AuthUpdate
-	pendingOrder    []string
-	dispatchCancel  context.CancelFunc
-	storePersister  storePersister
-	mirroredAuthDir string
-	oldConfigYaml   []byte
+	configPath        string
+	authDir           string
+	config            *config.Config
+	clientsMutex      sync.RWMutex
+	configReloadMu    sync.Mutex
+	configReloadTimer *time.Timer
+	reloadCallback    func(*config.Config)
+	watcher           *fsnotify.Watcher
+	lastAuthHashes    map[string]string
+	lastConfigHash    string
+	authQueue         chan<- AuthUpdate
+	currentAuths      map[string]*coreauth.Auth
+	dispatchMu        sync.Mutex
+	dispatchCond      *sync.Cond
+	pendingUpdates    map[string]AuthUpdate
+	pendingOrder      []string
+	dispatchCancel    context.CancelFunc
+	storePersister    storePersister
+	mirroredAuthDir   string
+	oldConfigYaml     []byte
 }

 type stableIDGenerator struct {
@@ -113,7 +115,8 @@ type AuthUpdate struct {
 const (
 	// replaceCheckDelay is a short delay to allow atomic replace (rename) to settle
 	// before deciding whether a Remove event indicates a real deletion.
-	replaceCheckDelay = 50 * time.Millisecond
+	replaceCheckDelay    = 50 * time.Millisecond
+	configReloadDebounce = 150 * time.Millisecond
 )

 // NewWatcher creates a new file watcher instance
@@ -172,9 +175,19 @@ func (w *Watcher) Start(ctx context.Context) error {
 // Stop stops the file watcher
 func (w *Watcher) Stop() error {
 	w.stopDispatch()
+	w.stopConfigReloadTimer()
 	return w.watcher.Close()
 }

+func (w *Watcher) stopConfigReloadTimer() {
+	w.configReloadMu.Lock()
+	if w.configReloadTimer != nil {
+		w.configReloadTimer.Stop()
+		w.configReloadTimer = nil
+	}
+	w.configReloadMu.Unlock()
+}
+
 // SetConfig updates the current configuration
 func (w *Watcher) SetConfig(cfg *config.Config) {
 	w.clientsMutex.Lock()
@@ -476,40 +489,7 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {
 	// Handle config file changes
 	if isConfigEvent {
 		log.Debugf("config file change details - operation: %s, timestamp: %s", event.Op.String(), now.Format("2006-01-02 15:04:05.000"))
-		data, err := os.ReadFile(w.configPath)
-		if err != nil {
-			log.Errorf("failed to read config file for hash check: %v", err)
-			return
-		}
-		if len(data) == 0 {
-			log.Debugf("ignoring empty config file write event")
-			return
-		}
-		sum := sha256.Sum256(data)
-		newHash := hex.EncodeToString(sum[:])
-
-		w.clientsMutex.RLock()
-		currentHash := w.lastConfigHash
-		w.clientsMutex.RUnlock()
-
-		if currentHash != "" && currentHash == newHash {
-			log.Debugf("config file content unchanged (hash match), skipping reload")
-			return
-		}
-		fmt.Printf("config file changed, reloading: %s\n", w.configPath)
-		if w.reloadConfig() {
-			finalHash := newHash
-			if updatedData, errRead := os.ReadFile(w.configPath); errRead == nil && len(updatedData) > 0 {
-				sumUpdated := sha256.Sum256(updatedData)
-				finalHash = hex.EncodeToString(sumUpdated[:])
-			} else if errRead != nil {
-				log.WithError(errRead).Debug("failed to compute updated config hash after reload")
-			}
-			w.clientsMutex.Lock()
-			w.lastConfigHash = finalHash
-			w.clientsMutex.Unlock()
-			w.persistConfigAsync()
-		}
+		w.scheduleConfigReload()
 		return
 	}

@@ -530,6 +510,57 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {
 	}
 }

+func (w *Watcher) scheduleConfigReload() {
+	w.configReloadMu.Lock()
+	defer w.configReloadMu.Unlock()
+	if w.configReloadTimer != nil {
+		w.configReloadTimer.Stop()
+	}
+	w.configReloadTimer = time.AfterFunc(configReloadDebounce, func() {
+		w.configReloadMu.Lock()
+		w.configReloadTimer = nil
+		w.configReloadMu.Unlock()
+		w.reloadConfigIfChanged()
+	})
+}
+
+func (w *Watcher) reloadConfigIfChanged() {
+	data, err := os.ReadFile(w.configPath)
+	if err != nil {
+		log.Errorf("failed to read config file for hash check: %v", err)
+		return
+	}
+	if len(data) == 0 {
+		log.Debugf("ignoring empty config file write event")
+		return
+	}
+	sum := sha256.Sum256(data)
+	newHash := hex.EncodeToString(sum[:])
+
+	w.clientsMutex.RLock()
+	currentHash := w.lastConfigHash
+	w.clientsMutex.RUnlock()
+
+	if currentHash != "" && currentHash == newHash {
+		log.Debugf("config file content unchanged (hash match), skipping reload")
+		return
+	}
+	fmt.Printf("config file changed, reloading: %s\n", w.configPath)
+	if w.reloadConfig() {
+		finalHash := newHash
+		if updatedData, errRead := os.ReadFile(w.configPath); errRead == nil && len(updatedData) > 0 {
+			sumUpdated := sha256.Sum256(updatedData)
+			finalHash = hex.EncodeToString(sumUpdated[:])
+		} else if errRead != nil {
+			log.WithError(errRead).Debug("failed to compute updated config hash after reload")
+		}
+		w.clientsMutex.Lock()
+		w.lastConfigHash = finalHash
+		w.clientsMutex.Unlock()
+		w.persistConfigAsync()
+	}
+}
+
 // reloadConfig reloads the configuration and triggers a full reload
 func (w *Watcher) reloadConfig() bool {
 	log.Debug("=========================== CONFIG RELOAD ============================")
@@ -762,16 +793,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 			if base != "" {
 				attrs["base_url"] = base
 			}
-			if len(entry.Headers) > 0 {
-				for hk, hv := range entry.Headers {
-					key := strings.TrimSpace(hk)
-					val := strings.TrimSpace(hv)
-					if key == "" || val == "" {
-						continue
-					}
-					attrs["header:"+key] = val
-				}
-			}
+			addConfigHeadersToAttrs(entry.Headers, attrs)
 			a := &coreauth.Auth{
 				ID:         id,
 				Provider:   "gemini",
@@ -803,6 +825,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 			if hash := computeClaudeModelsHash(ck.Models); hash != "" {
 				attrs["models_hash"] = hash
 			}
+			addConfigHeadersToAttrs(ck.Headers, attrs)
 			proxyURL := strings.TrimSpace(ck.ProxyURL)
 			a := &coreauth.Auth{
 				ID:         id,
@@ -831,6 +854,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 			if ck.BaseURL != "" {
 				attrs["base_url"] = ck.BaseURL
 			}
+			addConfigHeadersToAttrs(ck.Headers, attrs)
 			proxyURL := strings.TrimSpace(ck.ProxyURL)
 			a := &coreauth.Auth{
 				ID:         id,
@@ -873,6 +897,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 					if hash := computeOpenAICompatModelsHash(compat.Models); hash != "" {
 						attrs["models_hash"] = hash
 					}
+					addConfigHeadersToAttrs(compat.Headers, attrs)
 					a := &coreauth.Auth{
 						ID:         id,
 						Provider:   providerName,
@@ -905,6 +930,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 					if hash := computeOpenAICompatModelsHash(compat.Models); hash != "" {
 						attrs["models_hash"] = hash
 					}
+					addConfigHeadersToAttrs(compat.Headers, attrs)
 					a := &coreauth.Auth{
 						ID:         id,
 						Provider:   providerName,
@@ -930,6 +956,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 				if hash := computeOpenAICompatModelsHash(compat.Models); hash != "" {
 					attrs["models_hash"] = hash
 				}
+				addConfigHeadersToAttrs(compat.Headers, attrs)
 				a := &coreauth.Auth{
 					ID:         id,
 					Provider:   providerName,
@@ -1131,13 +1158,16 @@ func describeOpenAICompatibilityUpdate(oldEntry, newEntry config.OpenAICompatibi
 	newKeyCount := countAPIKeys(newEntry)
 	oldModelCount := countOpenAIModels(oldEntry.Models)
 	newModelCount := countOpenAIModels(newEntry.Models)
-	details := make([]string, 0, 2)
+	details := make([]string, 0, 3)
 	if oldKeyCount != newKeyCount {
 		details = append(details, fmt.Sprintf("api-keys %d -> %d", oldKeyCount, newKeyCount))
 	}
 	if oldModelCount != newModelCount {
 		details = append(details, fmt.Sprintf("models %d -> %d", oldModelCount, newModelCount))
 	}
+	if !equalStringMap(oldEntry.Headers, newEntry.Headers) {
+		details = append(details, "headers updated")
+	}
 	if len(details) == 0 {
 		return ""
 	}
@@ -1303,6 +1333,9 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 			if strings.TrimSpace(o.APIKey) != strings.TrimSpace(n.APIKey) {
 				changes = append(changes, fmt.Sprintf("claude[%d].api-key: updated", i))
 			}
+			if !equalStringMap(o.Headers, n.Headers) {
+				changes = append(changes, fmt.Sprintf("claude[%d].headers: updated", i))
+			}
 		}
 	}

@@ -1325,6 +1358,9 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 			if strings.TrimSpace(o.APIKey) != strings.TrimSpace(n.APIKey) {
 				changes = append(changes, fmt.Sprintf("codex[%d].api-key: updated", i))
 			}
+			if !equalStringMap(o.Headers, n.Headers) {
+				changes = append(changes, fmt.Sprintf("codex[%d].headers: updated", i))
+			}
 		}
 	}

@@ -1357,6 +1393,20 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	return changes
 }

+func addConfigHeadersToAttrs(headers map[string]string, attrs map[string]string) {
+	if len(headers) == 0 || attrs == nil {
+		return
+	}
+	for hk, hv := range headers {
+		key := strings.TrimSpace(hk)
+		val := strings.TrimSpace(hv)
+		if key == "" || val == "" {
+			continue
+		}
+		attrs["header:"+key] = val
+	}
+}
+
 func trimStrings(in []string) []string {
 	out := make([]string, len(in))
 	for i := range in {
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -305,6 +305,12 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) {
 	if s == nil || a == nil {
 		return
 	}
+	// Skip disabled auth entries when (re)binding executors.
+	// Disabled auths can linger during config reloads (e.g., removed OpenAI-compat entries)
+	// and must not override active provider executors (such as iFlow OAuth accounts).
+	if a.Disabled {
+		return
+	}
 	if compatProviderKey, _, isCompat := openAICompatInfoFromAuth(a); isCompat {
 		if compatProviderKey == "" {
 			compatProviderKey = strings.ToLower(strings.TrimSpace(a.Provider))
@@ -318,6 +324,8 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) {
 	switch strings.ToLower(a.Provider) {
 	case "gemini":
 		s.coreManager.RegisterExecutor(executor.NewGeminiExecutor(s.cfg))
+	case "vertex":
+		s.coreManager.RegisterExecutor(executor.NewGeminiVertexExecutor(s.cfg))
 	case "gemini-cli":
 		s.coreManager.RegisterExecutor(executor.NewGeminiCLIExecutor(s.cfg))
 	case "aistudio":
@@ -613,6 +621,9 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 	switch provider {
 	case "gemini":
 		models = registry.GetGeminiModels()
+	case "vertex":
+		// Vertex AI Gemini supports the same model identifiers as Gemini.
+		models = registry.GetGeminiModels()
 	case "gemini-cli":
 		models = registry.GetGeminiCLIModels()
 	case "aistudio":
@@ -738,7 +749,7 @@ func (s *Service) resolveConfigClaudeKey(auth *coreauth.Auth) *config.ClaudeKey
 			continue
 		}
 		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
-			if attrBase == "" || cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
+			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
 				return entry
 			}
 		}
Author	SHA1	Message	Date
Luis Pater	dc804e96fb	fix(management): improve error handling and normalize YAML comment indentation Enhance error management for file operations and clean up temporary files. Add `NormalizeCommentIndentation` function to ensure YAML comments maintain consistent formatting.	2025-11-11 08:37:57 +08:00
Luis Pater	ab76cb3662	feat(management): add Vertex service account import and WebSocket auth management Introduce an endpoint for importing Vertex service account JSON keys and storing them as authentication records. Add handlers for managing WebSocket authentication configuration.	2025-11-10 20:48:31 +08:00
Luis Pater	2965bdadc1	fix(translator): remove debug print statement from OpenAI Gemini request processing	2025-11-10 18:37:05 +08:00
Luis Pater	40f7061b04	feat(watcher): debounce config reloads to prevent redundant operations Introduce `scheduleConfigReload` with debounce functionality for config reloads, ensuring efficient handling of frequent changes. Added `stopConfigReloadTimer` for stopping timers during watcher shutdown.	2025-11-10 12:57:40 +08:00
Luis Pater	8c947cafbe	Merge branch 'vertex' into dev	2025-11-10 12:24:07 +08:00
Luis Pater	717eadf128	feat(vertex): add support for Vertex AI Gemini authentication and execution Introduce Vertex AI Gemini integration with support for service account-based authentication, credential storage, and import functionality. Added new executor for Vertex AI requests, including execution and streaming paths, and integrated it into the core manager. Enhanced CLI with `--vertex-import` flag for importing service account keys.	2025-11-10 12:23:51 +08:00
Luis Pater	9e105738fd	fix(server): add PATCH method to CORS allowed methods	2025-11-10 12:12:05 +08:00
Luis Pater	5d806fcefc	fix(translator): support system instructions with parts and inline data in OpenAI Gemini requests Handle both `systemInstruction` and `system_instruction` keys, processing text and inline data parts (e.g., images) for system messages in Gemini.	2025-11-10 10:31:32 +08:00
Luis Pater	6ae1dd78ed	Merge pull request #230 from router-for-me/api fix(management): exclude disabled runtime-only auths from file entries	2025-11-10 08:34:47 +08:00
hkfires	43095de162	fix(management): exclude disabled runtime-only auths from file entries	2025-11-10 08:32:42 +08:00
Luis Pater	ef7e8206d3	fix(executor): ensure usage reporting for upstream responses lacking usage data Add `ensurePublished` to guarantee request counting even when usage fields (e.g., tokens) are absent in OpenAI-compatible executor responses, particularly for streaming paths.	2025-11-09 17:24:47 +08:00
Luis Pater	87291c0d75	Merge pull request #227 from router-for-me/api add headers support for api	2025-11-09 14:00:37 +08:00
hkfires	51d2766d5c	fix(management): sanitize keys and normalize headers	2025-11-09 12:13:02 +08:00
hkfires	a00ba77604	refactor(config): rename SyncGeminiKeys; use Sanitize* methods	2025-11-09 08:29:47 +08:00
Luis Pater	3264605c2d	Merge pull request #226 from router-for-me/headers feat(config): support HTTP headers across providers	2025-11-08 21:41:31 +08:00
hkfires	cfb9cb8951	feat(config): support HTTP headers across providers	2025-11-08 20:52:05 +08:00
Luis Pater	bb00436509	fix(service): skip disabled auth entries during executor binding Prevent disabled auth entries from overriding active provider executors, addressing lingering configs during reloads (e.g., removed OpenAI-compat entries).	2025-11-08 18:19:34 +08:00
Luis Pater	1afbc4dd96	fix(translator): separate tool calls from content in OpenAI Claude requests	2025-11-08 17:57:46 +08:00
Luis Pater	d745f07044	fix(registry): replace Gemini model list with updated stable and preview versions	2025-11-08 15:51:57 +08:00
Luis Pater	695eaa5450	docs(instructions): add Codex operational and review guidelines Added detailed operational instructions for Codex agents based on GPT-5, covering shell usage, editing constraints, sandboxing policies, and approval mechanisms. Also included comprehensive review process guidelines for flagging and communicating issues effectively.	2025-11-08 15:19:51 +08:00
Luis Pater	67ad26c35a	fix(executor): remove default reasoning effort for `gpt-5-codex-mini`	2025-11-08 11:56:32 +08:00
Luis Pater	30d448e73c	fix(executor): update model name from `codex-mini-latest` to `gpt-5-codex-mini`	2025-11-08 11:17:40 +08:00
Luis Pater	d4064e3df4	Merge pull request #225 from jeffnash/feat/codex-mini-variants feat(registry): add GPT-5 Codex Mini model variants	2025-11-08 11:11:04 +08:00
jeffnash	ec354f7a1a	add default medium reasoning case for gpt-5-codex-mini Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2025-11-07 17:12:10 -08:00
jeffnash	240e782606	add default medium reasoning case for gpt-5-codex-mini Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2025-11-07 17:11:40 -08:00
Jeff Nash	fcb0293c0d	feat(registry): add GPT-5 Codex Mini model variants Adds three new Codex Mini model variants (mini, mini-medium, mini-high) that map to codex-mini-latest. Codex Mini supports medium and high reasoning effort levels only (no low/minimal). Base model defaults to medium reasoning effort.	2025-11-07 17:07:39 -08:00
Luis Pater	682c4598ee	fix(translator): handle gjson strings in OpenAI response formatting	2025-11-08 00:41:56 +08:00
Luis Pater	a7d105bd69	Fixed: #223 fix(registry): add `MiniMax-M2` model to registry definitions	2025-11-08 00:10:51 +08:00
Luis Pater	b9eef45305	Merge pull request #222 from router-for-me/api Return auth info from memory	2025-11-07 22:41:12 +08:00
Luis Pater	c8f20a66a8	fix(executor): add logging and prompt cache key handling for OpenAI responses	2025-11-07 22:40:45 +08:00
hkfires	1f6a384c9a	fix(api): omit auth file entries lacking path unless runtime-only	2025-11-07 19:15:54 +08:00
hkfires	c9fc033cf5	feat(management): support in-memory auth listing with disk fallback	2025-11-07 19:04:54 +08:00
Luis Pater	32c964d310	Merge pull request #221 from router-for-me/gemini fix(translator): accept camelCase thinking config in OpenAI→Gemini	2025-11-07 17:00:07 +08:00
hkfires	d60040b222	fix(translator): accept camelCase thinking config in OpenAI→Gemini	2025-11-07 16:45:31 +08:00