Merge pull request #371 from ben-vargas/test-amp-tools

fix(amp): add /threads.rss root-level route for AMP CLI
Merge pull request #366 from router-for-me/blacklist
2026-02-06 22:40:51 +08:00 · 2025-11-30 15:18:23 +08:00 · 2025-11-30 15:17:46 +08:00 · 2025-11-30 13:38:23 +08:00 · 2025-11-30 11:55:47 +08:00 · 2025-11-30 08:02:00 +08:00
38 changed files with 2276 additions and 384 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,6 +1,12 @@
 # Server port
 port: 8317

+# TLS settings for HTTPS. When enabled, the server listens with the provided certificate and key.
+tls:
+  enable: false
+  cert: ""
+  key: ""
+
 # Management API settings
 remote-management:
  # Whether to allow remote (non-localhost) management access.
@@ -38,6 +44,9 @@ proxy-url: ""
 # Number of times to retry a request. Retries will occur if the HTTP response code is 403, 408, 500, 502, 503, or 504.
 request-retry: 3

+# Maximum wait time in seconds for a cooled-down credential before triggering a retry.
+max-retry-interval: 30
+
 # Quota exceeded behavior
 quota-exceeded:
  switch-project: true # Whether to automatically switch to another project when a quota is exceeded
@@ -53,6 +62,11 @@ ws-auth: false
 #    headers:
 #      X-Custom-Header: "custom-value"
 #    proxy-url: "socks5://proxy.example.com:1080"
+#    excluded-models:
+#      - "gemini-2.5-pro"     # exclude specific models from this provider (exact match)
+#      - "gemini-2.5-*"       # wildcard matching prefix (e.g. gemini-2.5-flash, gemini-2.5-pro)
+#      - "*-preview"          # wildcard matching suffix (e.g. gemini-3-pro-preview)
+#      - "*flash*"            # wildcard matching substring (e.g. gemini-2.5-flash-lite)
 #  - api-key: "AIzaSy...02"

 # API keys for official Generative Language API (legacy compatibility)
@@ -67,6 +81,11 @@ ws-auth: false
 #    headers:
 #      X-Custom-Header: "custom-value"
 #    proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
+#    excluded-models:
+#      - "gpt-5.1"         # exclude specific models (exact match)
+#      - "gpt-5-*"         # wildcard matching prefix (e.g. gpt-5-medium, gpt-5-codex)
+#      - "*-mini"          # wildcard matching suffix (e.g. gpt-5-codex-mini)
+#      - "*codex*"         # wildcard matching substring (e.g. gpt-5-codex-low)

 # Claude API keys
 #claude-api-key:
@@ -79,6 +98,11 @@ ws-auth: false
 #    models:
 #      - name: "claude-3-5-sonnet-20241022" # upstream model name
 #        alias: "claude-sonnet-latest" # client alias mapped to the upstream model
+#    excluded-models:
+#      - "claude-opus-4-5-20251101" # exclude specific models (exact match)
+#      - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
+#      - "*-think"                  # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
+#      - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)

 # OpenAI compatibility providers
 #openai-compatibility:
@@ -112,3 +136,25 @@ ws-auth: false
 #          protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #      params: # JSON path (gjson/sjson syntax) -> value
 #        "reasoning.effort": "high"
+
+# OAuth provider excluded models
+#oauth-excluded-models:
+#  gemini-cli:
+#    - "gemini-2.5-pro"     # exclude specific models (exact match)
+#    - "gemini-2.5-*"       # wildcard matching prefix (e.g. gemini-2.5-flash, gemini-2.5-pro)
+#    - "*-preview"          # wildcard matching suffix (e.g. gemini-3-pro-preview)
+#    - "*flash*"            # wildcard matching substring (e.g. gemini-2.5-flash-lite)
+#  vertex:
+#    - "gemini-3-pro-preview"
+#  aistudio:
+#    - "gemini-3-pro-preview"
+#  antigravity:
+#    - "gemini-3-pro-preview"
+#  claude:
+#    - "claude-3-5-haiku-20241022"
+#  codex:
+#    - "gpt-5-codex-mini"
+#  qwen:
+#    - "vision-model"
+#  iflow:
+#    - "tstars2.0"
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -235,7 +235,11 @@ func (h *Handler) managementCallbackURL(path string) (string, error) {
 	if !strings.HasPrefix(path, "/") {
 		path = "/" + path
 	}
-	return fmt.Sprintf("http://127.0.0.1:%d%s", h.cfg.Port, path), nil
+	scheme := "http"
+	if h.cfg.TLS.Enable {
+		scheme = "https"
+	}
+	return fmt.Sprintf("%s://127.0.0.1:%d%s", scheme, h.cfg.Port, path), nil
 }

 func (h *Handler) ListAuthFiles(c *gin.Context) {
--- a/internal/api/handlers/management/config_basic.go
+++ b/internal/api/handlers/management/config_basic.go
@@ -172,6 +172,14 @@ func (h *Handler) PutRequestRetry(c *gin.Context) {
 	h.updateIntField(c, func(v int) { h.cfg.RequestRetry = v })
 }

+// Max retry interval
+func (h *Handler) GetMaxRetryInterval(c *gin.Context) {
+	c.JSON(200, gin.H{"max-retry-interval": h.cfg.MaxRetryInterval})
+}
+func (h *Handler) PutMaxRetryInterval(c *gin.Context) {
+	h.updateIntField(c, func(v int) { h.cfg.MaxRetryInterval = v })
+}
+
 // Proxy URL
 func (h *Handler) GetProxyURL(c *gin.Context) { c.JSON(200, gin.H{"proxy-url": h.cfg.ProxyURL}) }
 func (h *Handler) PutProxyURL(c *gin.Context) {
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -223,6 +223,7 @@ func (h *Handler) PatchGeminiKey(c *gin.Context) {
 	value.APIKey = strings.TrimSpace(value.APIKey)
 	value.BaseURL = strings.TrimSpace(value.BaseURL)
 	value.ProxyURL = strings.TrimSpace(value.ProxyURL)
+	value.ExcludedModels = config.NormalizeExcludedModels(value.ExcludedModels)
 	if value.APIKey == "" {
 		// Treat empty API key as delete.
 		if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.GeminiKey) {
@@ -504,6 +505,91 @@ func (h *Handler) DeleteOpenAICompat(c *gin.Context) {
 	c.JSON(400, gin.H{"error": "missing name or index"})
 }

+// oauth-excluded-models: map[string][]string
+func (h *Handler) GetOAuthExcludedModels(c *gin.Context) {
+	c.JSON(200, gin.H{"oauth-excluded-models": config.NormalizeOAuthExcludedModels(h.cfg.OAuthExcludedModels)})
+}
+
+func (h *Handler) PutOAuthExcludedModels(c *gin.Context) {
+	data, err := c.GetRawData()
+	if err != nil {
+		c.JSON(400, gin.H{"error": "failed to read body"})
+		return
+	}
+	var entries map[string][]string
+	if err = json.Unmarshal(data, &entries); err != nil {
+		var wrapper struct {
+			Items map[string][]string `json:"items"`
+		}
+		if err2 := json.Unmarshal(data, &wrapper); err2 != nil {
+			c.JSON(400, gin.H{"error": "invalid body"})
+			return
+		}
+		entries = wrapper.Items
+	}
+	h.cfg.OAuthExcludedModels = config.NormalizeOAuthExcludedModels(entries)
+	h.persist(c)
+}
+
+func (h *Handler) PatchOAuthExcludedModels(c *gin.Context) {
+	var body struct {
+		Provider *string  `json:"provider"`
+		Models   []string `json:"models"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil || body.Provider == nil {
+		c.JSON(400, gin.H{"error": "invalid body"})
+		return
+	}
+	provider := strings.ToLower(strings.TrimSpace(*body.Provider))
+	if provider == "" {
+		c.JSON(400, gin.H{"error": "invalid provider"})
+		return
+	}
+	normalized := config.NormalizeExcludedModels(body.Models)
+	if len(normalized) == 0 {
+		if h.cfg.OAuthExcludedModels == nil {
+			c.JSON(404, gin.H{"error": "provider not found"})
+			return
+		}
+		if _, ok := h.cfg.OAuthExcludedModels[provider]; !ok {
+			c.JSON(404, gin.H{"error": "provider not found"})
+			return
+		}
+		delete(h.cfg.OAuthExcludedModels, provider)
+		if len(h.cfg.OAuthExcludedModels) == 0 {
+			h.cfg.OAuthExcludedModels = nil
+		}
+		h.persist(c)
+		return
+	}
+	if h.cfg.OAuthExcludedModels == nil {
+		h.cfg.OAuthExcludedModels = make(map[string][]string)
+	}
+	h.cfg.OAuthExcludedModels[provider] = normalized
+	h.persist(c)
+}
+
+func (h *Handler) DeleteOAuthExcludedModels(c *gin.Context) {
+	provider := strings.ToLower(strings.TrimSpace(c.Query("provider")))
+	if provider == "" {
+		c.JSON(400, gin.H{"error": "missing provider"})
+		return
+	}
+	if h.cfg.OAuthExcludedModels == nil {
+		c.JSON(404, gin.H{"error": "provider not found"})
+		return
+	}
+	if _, ok := h.cfg.OAuthExcludedModels[provider]; !ok {
+		c.JSON(404, gin.H{"error": "provider not found"})
+		return
+	}
+	delete(h.cfg.OAuthExcludedModels, provider)
+	if len(h.cfg.OAuthExcludedModels) == 0 {
+		h.cfg.OAuthExcludedModels = nil
+	}
+	h.persist(c)
+}
+
 // codex-api-key: []CodexKey
 func (h *Handler) GetCodexKeys(c *gin.Context) {
 	c.JSON(200, gin.H{"codex-api-key": h.cfg.CodexKey})
@@ -533,6 +619,7 @@ func (h *Handler) PutCodexKeys(c *gin.Context) {
 		entry.BaseURL = strings.TrimSpace(entry.BaseURL)
 		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 		entry.Headers = config.NormalizeHeaders(entry.Headers)
+		entry.ExcludedModels = config.NormalizeExcludedModels(entry.ExcludedModels)
 		if entry.BaseURL == "" {
 			continue
 		}
@@ -557,6 +644,7 @@ func (h *Handler) PatchCodexKey(c *gin.Context) {
 	value.BaseURL = strings.TrimSpace(value.BaseURL)
 	value.ProxyURL = strings.TrimSpace(value.ProxyURL)
 	value.Headers = config.NormalizeHeaders(value.Headers)
+	value.ExcludedModels = config.NormalizeExcludedModels(value.ExcludedModels)
 	// If base-url becomes empty, delete instead of update
 	if value.BaseURL == "" {
 		if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.CodexKey) {
@@ -694,6 +782,7 @@ func normalizeClaudeKey(entry *config.ClaudeKey) {
 	entry.BaseURL = strings.TrimSpace(entry.BaseURL)
 	entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 	entry.Headers = config.NormalizeHeaders(entry.Headers)
+	entry.ExcludedModels = config.NormalizeExcludedModels(entry.ExcludedModels)
 	if len(entry.Models) == 0 {
 		return
 	}
--- a/internal/api/handlers/management/logs.go
+++ b/internal/api/handlers/management/logs.go
@@ -58,8 +58,14 @@ func (h *Handler) GetLogs(c *gin.Context) {
 		return
 	}

+	limit, errLimit := parseLimit(c.Query("limit"))
+	if errLimit != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("invalid limit: %v", errLimit)})
+		return
+	}
+
 	cutoff := parseCutoff(c.Query("after"))
-	acc := newLogAccumulator(cutoff)
+	acc := newLogAccumulator(cutoff, limit)
 	for i := range files {
 		if errProcess := acc.consumeFile(files[i]); errProcess != nil {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read log file %s: %v", files[i], errProcess)})
@@ -139,6 +145,126 @@ func (h *Handler) DeleteLogs(c *gin.Context) {
 	})
 }

+// GetRequestErrorLogs lists error request log files when RequestLog is disabled.
+// It returns an empty list when RequestLog is enabled.
+func (h *Handler) GetRequestErrorLogs(c *gin.Context) {
+	if h == nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "handler unavailable"})
+		return
+	}
+	if h.cfg == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "configuration unavailable"})
+		return
+	}
+	if h.cfg.RequestLog {
+		c.JSON(http.StatusOK, gin.H{"files": []any{}})
+		return
+	}
+
+	dir := h.logDirectory()
+	if strings.TrimSpace(dir) == "" {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "log directory not configured"})
+		return
+	}
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			c.JSON(http.StatusOK, gin.H{"files": []any{}})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to list request error logs: %v", err)})
+		return
+	}
+
+	type errorLog struct {
+		Name     string `json:"name"`
+		Size     int64  `json:"size"`
+		Modified int64  `json:"modified"`
+	}
+
+	files := make([]errorLog, 0, len(entries))
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+		if !strings.HasPrefix(name, "error-") || !strings.HasSuffix(name, ".log") {
+			continue
+		}
+		info, errInfo := entry.Info()
+		if errInfo != nil {
+			c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read log info for %s: %v", name, errInfo)})
+			return
+		}
+		files = append(files, errorLog{
+			Name:     name,
+			Size:     info.Size(),
+			Modified: info.ModTime().Unix(),
+		})
+	}
+
+	sort.Slice(files, func(i, j int) bool { return files[i].Modified > files[j].Modified })
+
+	c.JSON(http.StatusOK, gin.H{"files": files})
+}
+
+// DownloadRequestErrorLog downloads a specific error request log file by name.
+func (h *Handler) DownloadRequestErrorLog(c *gin.Context) {
+	if h == nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "handler unavailable"})
+		return
+	}
+	if h.cfg == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "configuration unavailable"})
+		return
+	}
+
+	dir := h.logDirectory()
+	if strings.TrimSpace(dir) == "" {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "log directory not configured"})
+		return
+	}
+
+	name := strings.TrimSpace(c.Param("name"))
+	if name == "" || strings.Contains(name, "/") || strings.Contains(name, "\\") {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file name"})
+		return
+	}
+	if !strings.HasPrefix(name, "error-") || !strings.HasSuffix(name, ".log") {
+		c.JSON(http.StatusNotFound, gin.H{"error": "log file not found"})
+		return
+	}
+
+	dirAbs, errAbs := filepath.Abs(dir)
+	if errAbs != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to resolve log directory: %v", errAbs)})
+		return
+	}
+	fullPath := filepath.Clean(filepath.Join(dirAbs, name))
+	prefix := dirAbs + string(os.PathSeparator)
+	if !strings.HasPrefix(fullPath, prefix) {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file path"})
+		return
+	}
+
+	info, errStat := os.Stat(fullPath)
+	if errStat != nil {
+		if os.IsNotExist(errStat) {
+			c.JSON(http.StatusNotFound, gin.H{"error": "log file not found"})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read log file: %v", errStat)})
+		return
+	}
+	if info.IsDir() {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file"})
+		return
+	}
+
+	c.FileAttachment(fullPath, name)
+}
+
 func (h *Handler) logDirectory() string {
 	if h == nil {
 		return ""
@@ -194,16 +320,22 @@ func (h *Handler) collectLogFiles(dir string) ([]string, error) {

 type logAccumulator struct {
 	cutoff  int64
+	limit   int
 	lines   []string
 	total   int
 	latest  int64
 	include bool
 }

-func newLogAccumulator(cutoff int64) *logAccumulator {
+func newLogAccumulator(cutoff int64, limit int) *logAccumulator {
+	capacity := 256
+	if limit > 0 && limit < capacity {
+		capacity = limit
+	}
 	return &logAccumulator{
 		cutoff: cutoff,
-		lines:  make([]string, 0, 256),
+		limit:  limit,
+		lines:  make([]string, 0, capacity),
 	}
 }

@@ -215,7 +347,9 @@ func (acc *logAccumulator) consumeFile(path string) error {
 		}
 		return err
 	}
-	defer file.Close()
+	defer func() {
+		_ = file.Close()
+	}()

 	scanner := bufio.NewScanner(file)
 	buf := make([]byte, 0, logScannerInitialBuffer)
@@ -239,12 +373,19 @@ func (acc *logAccumulator) addLine(raw string) {
 	if ts > 0 {
 		acc.include = acc.cutoff == 0 || ts > acc.cutoff
 		if acc.cutoff == 0 || acc.include {
-			acc.lines = append(acc.lines, line)
+			acc.append(line)
 		}
 		return
 	}
 	if acc.cutoff == 0 || acc.include {
-		acc.lines = append(acc.lines, line)
+		acc.append(line)
+	}
+}
+
+func (acc *logAccumulator) append(line string) {
+	acc.lines = append(acc.lines, line)
+	if acc.limit > 0 && len(acc.lines) > acc.limit {
+		acc.lines = acc.lines[len(acc.lines)-acc.limit:]
 	}
 }

@@ -267,6 +408,21 @@ func parseCutoff(raw string) int64 {
 	return ts
 }

+func parseLimit(raw string) (int, error) {
+	value := strings.TrimSpace(raw)
+	if value == "" {
+		return 0, nil
+	}
+	limit, err := strconv.Atoi(value)
+	if err != nil {
+		return 0, fmt.Errorf("must be a positive integer")
+	}
+	if limit <= 0 {
+		return 0, fmt.Errorf("must be greater than zero")
+	}
+	return limit, nil
+}
+
 func parseTimestamp(line string) int64 {
 	if strings.HasPrefix(line, "[") {
 		line = line[1:]
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -6,6 +6,7 @@ package middleware
 import (
 	"bytes"
 	"io"
+	"net/http"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -15,8 +16,8 @@ import (

 // RequestLoggingMiddleware creates a Gin middleware that logs HTTP requests and responses.
 // It captures detailed information about the request and response, including headers and body,
-// and uses the provided RequestLogger to record this data. If logging is disabled in the
-// logger, the middleware has minimal overhead.
+// and uses the provided RequestLogger to record this data. When logging is disabled in the
+// logger, it still captures data so that upstream errors can be persisted.
 func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		if logger == nil {
@@ -24,14 +25,13 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 			return
 		}

-		path := c.Request.URL.Path
-		if !shouldLogRequest(path) {
+		if c.Request.Method == http.MethodGet {
 			c.Next()
 			return
 		}

-		// Early return if logging is disabled (zero overhead)
-		if !logger.IsEnabled() {
+		path := c.Request.URL.Path
+		if !shouldLogRequest(path) {
 			c.Next()
 			return
 		}
@@ -47,6 +47,9 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {

 		// Create response writer wrapper
 		wrapper := NewResponseWriterWrapper(c.Writer, logger, requestInfo)
+		if !logger.IsEnabled() {
+			wrapper.logOnErrorOnly = true
+		}
 		c.Writer = wrapper

 		// Process the request
--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -5,6 +5,7 @@ package middleware

 import (
 	"bytes"
+	"net/http"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -24,15 +25,16 @@ type RequestInfo struct {
 // It is designed to handle both standard and streaming responses, ensuring that logging operations do not block the client response.
 type ResponseWriterWrapper struct {
 	gin.ResponseWriter
-	body         *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
-	isStreaming  bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
-	streamWriter logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
-	chunkChannel chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
-	streamDone   chan struct{}              // streamDone signals when the streaming goroutine completes.
-	logger       logging.RequestLogger      // logger is the instance of the request logger service.
-	requestInfo  *RequestInfo               // requestInfo holds the details of the original request.
-	statusCode   int                        // statusCode stores the HTTP status code of the response.
-	headers      map[string][]string        // headers stores the response headers.
+	body           *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
+	isStreaming    bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
+	streamWriter   logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
+	chunkChannel   chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
+	streamDone     chan struct{}              // streamDone signals when the streaming goroutine completes.
+	logger         logging.RequestLogger      // logger is the instance of the request logger service.
+	requestInfo    *RequestInfo               // requestInfo holds the details of the original request.
+	statusCode     int                        // statusCode stores the HTTP status code of the response.
+	headers        map[string][]string        // headers stores the response headers.
+	logOnErrorOnly bool                       // logOnErrorOnly enables logging only when an error response is detected.
 }

 // NewResponseWriterWrapper creates and initializes a new ResponseWriterWrapper.
@@ -192,12 +194,34 @@ func (w *ResponseWriterWrapper) processStreamingChunks(done chan struct{}) {
 // For non-streaming responses, it logs the complete request and response details,
 // including any API-specific request/response data stored in the Gin context.
 func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
-	if !w.logger.IsEnabled() {
+	if w.logger == nil {
+		return nil
+	}
+
+	finalStatusCode := w.statusCode
+	if finalStatusCode == 0 {
+		if statusWriter, ok := w.ResponseWriter.(interface{ Status() int }); ok {
+			finalStatusCode = statusWriter.Status()
+		} else {
+			finalStatusCode = 200
+		}
+	}
+
+	var slicesAPIResponseError []*interfaces.ErrorMessage
+	apiResponseError, isExist := c.Get("API_RESPONSE_ERROR")
+	if isExist {
+		if apiErrors, ok := apiResponseError.([]*interfaces.ErrorMessage); ok {
+			slicesAPIResponseError = apiErrors
+		}
+	}
+
+	hasAPIError := len(slicesAPIResponseError) > 0 || finalStatusCode >= http.StatusBadRequest
+	forceLog := w.logOnErrorOnly && hasAPIError && !w.logger.IsEnabled()
+	if !w.logger.IsEnabled() && !forceLog {
 		return nil
 	}

 	if w.isStreaming {
-		// Close streaming channel and writer
 		if w.chunkChannel != nil {
 			close(w.chunkChannel)
 			w.chunkChannel = nil
@@ -209,80 +233,98 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		}

 		if w.streamWriter != nil {
-			err := w.streamWriter.Close()
+			if err := w.streamWriter.Close(); err != nil {
+				w.streamWriter = nil
+				return err
+			}
 			w.streamWriter = nil
-			return err
 		}
-	} else {
-		// Capture final status code and headers if not already captured
-		finalStatusCode := w.statusCode
-		if finalStatusCode == 0 {
-			// Get status from underlying ResponseWriter if available
-			if statusWriter, ok := w.ResponseWriter.(interface{ Status() int }); ok {
-				finalStatusCode = statusWriter.Status()
-			} else {
-				finalStatusCode = 200 // Default
-			}
+		if forceLog {
+			return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
 		}
+		return nil
+	}

-		// Ensure we have the latest headers before finalizing
-		w.ensureHeadersCaptured()
+	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
+}

-		// Use the captured headers as the final headers
-		finalHeaders := make(map[string][]string)
-		for key, values := range w.headers {
-			// Make a copy of the values slice to avoid reference issues
-			headerValues := make([]string, len(values))
-			copy(headerValues, values)
-			finalHeaders[key] = headerValues
-		}
+func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
+	w.ensureHeadersCaptured()

-		var apiRequestBody []byte
-		apiRequest, isExist := c.Get("API_REQUEST")
-		if isExist {
-			var ok bool
-			apiRequestBody, ok = apiRequest.([]byte)
-			if !ok {
-				apiRequestBody = nil
-			}
-		}
+	finalHeaders := make(map[string][]string, len(w.headers))
+	for key, values := range w.headers {
+		headerValues := make([]string, len(values))
+		copy(headerValues, values)
+		finalHeaders[key] = headerValues
+	}

-		var apiResponseBody []byte
-		apiResponse, isExist := c.Get("API_RESPONSE")
-		if isExist {
-			var ok bool
-			apiResponseBody, ok = apiResponse.([]byte)
-			if !ok {
-				apiResponseBody = nil
-			}
-		}
+	return finalHeaders
+}

-		var slicesAPIResponseError []*interfaces.ErrorMessage
-		apiResponseError, isExist := c.Get("API_RESPONSE_ERROR")
-		if isExist {
-			var ok bool
-			slicesAPIResponseError, ok = apiResponseError.([]*interfaces.ErrorMessage)
-			if !ok {
-				slicesAPIResponseError = nil
-			}
-		}
+func (w *ResponseWriterWrapper) extractAPIRequest(c *gin.Context) []byte {
+	apiRequest, isExist := c.Get("API_REQUEST")
+	if !isExist {
+		return nil
+	}
+	data, ok := apiRequest.([]byte)
+	if !ok || len(data) == 0 {
+		return nil
+	}
+	return data
+}

-		// Log complete non-streaming response
-		return w.logger.LogRequest(
+func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
+	apiResponse, isExist := c.Get("API_RESPONSE")
+	if !isExist {
+		return nil
+	}
+	data, ok := apiResponse.([]byte)
+	if !ok || len(data) == 0 {
+		return nil
+	}
+	return data
+}
+
+func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
+	if w.requestInfo == nil {
+		return nil
+	}
+
+	var requestBody []byte
+	if len(w.requestInfo.Body) > 0 {
+		requestBody = w.requestInfo.Body
+	}
+
+	if loggerWithOptions, ok := w.logger.(interface {
+		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool) error
+	}); ok {
+		return loggerWithOptions.LogRequestWithOptions(
 			w.requestInfo.URL,
 			w.requestInfo.Method,
 			w.requestInfo.Headers,
-			w.requestInfo.Body,
-			finalStatusCode,
-			finalHeaders,
-			w.body.Bytes(),
+			requestBody,
+			statusCode,
+			headers,
+			body,
 			apiRequestBody,
 			apiResponseBody,
-			slicesAPIResponseError,
+			apiResponseErrors,
+			forceLog,
 		)
 	}

-	return nil
+	return w.logger.LogRequest(
+		w.requestInfo.URL,
+		w.requestInfo.Method,
+		w.requestInfo.Headers,
+		requestBody,
+		statusCode,
+		headers,
+		body,
+		apiRequestBody,
+		apiResponseBody,
+		apiResponseErrors,
+	)
 }

 // Status returns the HTTP response status code captured by the wrapper.
--- a/internal/api/modules/amp/amp.go
+++ b/internal/api/modules/amp/amp.go
@@ -181,5 +181,3 @@ func (m *AmpModule) OnConfigUpdated(cfg *config.Config) error {
 	log.Debug("Amp config updated (restart required for URL changes)")
 	return nil
 }
-
-
--- a/internal/api/modules/amp/proxy.go
+++ b/internal/api/modules/amp/proxy.go
@@ -83,7 +83,7 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 		// Peek at first 2 bytes to detect gzip magic bytes
 		header := make([]byte, 2)
 		n, _ := io.ReadFull(originalBody, header)
-		
+
 		// Check for gzip magic bytes (0x1f 0x8b)
 		// If n < 2, we didn't get enough bytes, so it's not gzip
 		if n >= 2 && header[0] == 0x1f && header[1] == 0x8b {
@@ -97,7 +97,7 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 				}
 				return nil
 			}
-			
+
 			// Reconstruct complete gzipped data
 			gzippedData := append(header[:n], rest...)

@@ -129,8 +129,8 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 			resp.ContentLength = int64(len(decompressed))

 			// Update headers to reflect decompressed state
-			resp.Header.Del("Content-Encoding")                                      // No longer compressed
-			resp.Header.Del("Content-Length")                                        // Remove stale compressed length
+			resp.Header.Del("Content-Encoding")                                          // No longer compressed
+			resp.Header.Del("Content-Length")                                            // Remove stale compressed length
 			resp.Header.Set("Content-Length", strconv.FormatInt(resp.ContentLength, 10)) // Set decompressed length

 			log.Debugf("amp proxy: decompressed gzip response (%d -> %d bytes)", len(gzippedData), len(decompressed))
--- a/internal/api/modules/amp/proxy_test.go
+++ b/internal/api/modules/amp/proxy_test.go
@@ -440,52 +440,52 @@ func TestIsStreamingResponse(t *testing.T) {

 func TestFilterBetaFeatures(t *testing.T) {
 	tests := []struct {
-		name           string
-		header         string
+		name            string
+		header          string
 		featureToRemove string
-		expected       string
+		expected        string
 	}{
 		{
-			name:           "Remove context-1m from middle",
-			header:         "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07,oauth-2025-04-20",
+			name:            "Remove context-1m from middle",
+			header:          "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07,oauth-2025-04-20",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			expected:        "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 		},
 		{
-			name:           "Remove context-1m from start",
-			header:         "context-1m-2025-08-07,fine-grained-tool-streaming-2025-05-14",
+			name:            "Remove context-1m from start",
+			header:          "context-1m-2025-08-07,fine-grained-tool-streaming-2025-05-14",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14",
+			expected:        "fine-grained-tool-streaming-2025-05-14",
 		},
 		{
-			name:           "Remove context-1m from end",
-			header:         "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07",
+			name:            "Remove context-1m from end",
+			header:          "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14",
+			expected:        "fine-grained-tool-streaming-2025-05-14",
 		},
 		{
-			name:           "Feature not present",
-			header:         "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			name:            "Feature not present",
+			header:          "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			expected:        "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 		},
 		{
-			name:           "Only feature to remove",
-			header:         "context-1m-2025-08-07",
+			name:            "Only feature to remove",
+			header:          "context-1m-2025-08-07",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "",
+			expected:        "",
 		},
 		{
-			name:           "Empty header",
-			header:         "",
+			name:            "Empty header",
+			header:          "",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "",
+			expected:        "",
 		},
 		{
-			name:           "Header with spaces",
-			header:         "fine-grained-tool-streaming-2025-05-14, context-1m-2025-08-07 , oauth-2025-04-20",
+			name:            "Header with spaces",
+			header:          "fine-grained-tool-streaming-2025-05-14, context-1m-2025-08-07 , oauth-2025-04-20",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			expected:        "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 		},
 	}

--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -6,11 +6,11 @@ import (
 	"strings"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/openai"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )

@@ -111,6 +111,14 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	ampAPI.Any("/otel", proxyHandler)
 	ampAPI.Any("/otel/*path", proxyHandler)

+	// Root-level routes that AMP CLI expects without /api prefix
+	// These need the same security middleware as the /api/* routes
+	rootMiddleware := []gin.HandlerFunc{noCORSMiddleware()}
+	if restrictToLocalhost {
+		rootMiddleware = append(rootMiddleware, localhostOnlyMiddleware())
+	}
+	engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...)
+
 	// Google v1beta1 passthrough with OAuth fallback
 	// AMP CLI uses non-standard paths like /publishers/google/models/...
 	// We bridge these to our standard Gemini handler to enable local OAuth.
--- a/internal/api/modules/amp/routes_test.go
+++ b/internal/api/modules/amp/routes_test.go
@@ -37,6 +37,7 @@ func TestRegisterManagementRoutes(t *testing.T) {
 		{"/api/meta", http.MethodGet},
 		{"/api/telemetry", http.MethodGet},
 		{"/api/threads", http.MethodGet},
+		{"/threads.rss", http.MethodGet}, // Root-level route (no /api prefix)
 		{"/api/otel", http.MethodGet},
 		// Google v1beta1 bridge should still proxy non-model requests (GET) and allow POST
 		{"/api/provider/google/v1beta1/models", http.MethodGet},
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -247,6 +247,9 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	// Save initial YAML snapshot
 	s.oldConfigYaml, _ = yaml.Marshal(cfg)
 	s.applyAccessConfig(nil, cfg)
+	if authManager != nil {
+		authManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
+	}
 	managementasset.SetCurrentConfig(cfg)
 	auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
 	// Initialize management handler
@@ -509,6 +512,8 @@ func (s *Server) registerManagementRoutes() {

 		mgmt.GET("/logs", s.mgmt.GetLogs)
 		mgmt.DELETE("/logs", s.mgmt.DeleteLogs)
+		mgmt.GET("/request-error-logs", s.mgmt.GetRequestErrorLogs)
+		mgmt.GET("/request-error-logs/:name", s.mgmt.DownloadRequestErrorLog)
 		mgmt.GET("/request-log", s.mgmt.GetRequestLog)
 		mgmt.PUT("/request-log", s.mgmt.PutRequestLog)
 		mgmt.PATCH("/request-log", s.mgmt.PutRequestLog)
@@ -519,6 +524,9 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/request-retry", s.mgmt.GetRequestRetry)
 		mgmt.PUT("/request-retry", s.mgmt.PutRequestRetry)
 		mgmt.PATCH("/request-retry", s.mgmt.PutRequestRetry)
+		mgmt.GET("/max-retry-interval", s.mgmt.GetMaxRetryInterval)
+		mgmt.PUT("/max-retry-interval", s.mgmt.PutMaxRetryInterval)
+		mgmt.PATCH("/max-retry-interval", s.mgmt.PutMaxRetryInterval)

 		mgmt.GET("/claude-api-key", s.mgmt.GetClaudeKeys)
 		mgmt.PUT("/claude-api-key", s.mgmt.PutClaudeKeys)
@@ -535,6 +543,11 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PATCH("/openai-compatibility", s.mgmt.PatchOpenAICompat)
 		mgmt.DELETE("/openai-compatibility", s.mgmt.DeleteOpenAICompat)

+		mgmt.GET("/oauth-excluded-models", s.mgmt.GetOAuthExcludedModels)
+		mgmt.PUT("/oauth-excluded-models", s.mgmt.PutOAuthExcludedModels)
+		mgmt.PATCH("/oauth-excluded-models", s.mgmt.PatchOAuthExcludedModels)
+		mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)
+
 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
@@ -686,17 +699,33 @@ func (s *Server) unifiedModelsHandler(openaiHandler *openai.OpenAIAPIHandler, cl
 	}
 }

-// Start begins listening for and serving HTTP requests.
+// Start begins listening for and serving HTTP or HTTPS requests.
 // It's a blocking call and will only return on an unrecoverable error.
 //
 // Returns:
 //   - error: An error if the server fails to start
 func (s *Server) Start() error {
-	log.Debugf("Starting API server on %s", s.server.Addr)
+	if s == nil || s.server == nil {
+		return fmt.Errorf("failed to start HTTP server: server not initialized")
+	}

-	// Start the HTTP server.
-	if err := s.server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
-		return fmt.Errorf("failed to start HTTP server: %v", err)
+	useTLS := s.cfg != nil && s.cfg.TLS.Enable
+	if useTLS {
+		cert := strings.TrimSpace(s.cfg.TLS.Cert)
+		key := strings.TrimSpace(s.cfg.TLS.Key)
+		if cert == "" || key == "" {
+			return fmt.Errorf("failed to start HTTPS server: tls.cert or tls.key is empty")
+		}
+		log.Debugf("Starting API server on %s with TLS", s.server.Addr)
+		if errServeTLS := s.server.ListenAndServeTLS(cert, key); errServeTLS != nil && !errors.Is(errServeTLS, http.ErrServerClosed) {
+			return fmt.Errorf("failed to start HTTPS server: %v", errServeTLS)
+		}
+		return nil
+	}
+
+	log.Debugf("Starting API server on %s", s.server.Addr)
+	if errServe := s.server.ListenAndServe(); errServe != nil && !errors.Is(errServe, http.ErrServerClosed) {
+		return fmt.Errorf("failed to start HTTP server: %v", errServe)
 	}

 	return nil
@@ -814,6 +843,9 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 			log.Debugf("disable_cooling toggled to %t", cfg.DisableCooling)
 		}
 	}
+	if s.handlers != nil && s.handlers.AuthManager != nil {
+		s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
+	}

 	// Update log level dynamically when debug flag changes
 	if oldCfg == nil || oldCfg.Debug != cfg.Debug {
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -23,6 +23,9 @@ type Config struct {
 	// Port is the network port on which the API server will listen.
 	Port int `yaml:"port" json:"-"`

+	// TLS config controls HTTPS server settings.
+	TLS TLSConfig `yaml:"tls" json:"tls"`
+
 	// AmpUpstreamURL defines the upstream Amp control plane used for non-provider calls.
 	AmpUpstreamURL string `yaml:"amp-upstream-url" json:"amp-upstream-url"`

@@ -63,6 +66,8 @@ type Config struct {

 	// RequestRetry defines the retry times when the request failed.
 	RequestRetry int `yaml:"request-retry" json:"request-retry"`
+	// MaxRetryInterval defines the maximum wait time in seconds before retrying a cooled-down credential.
+	MaxRetryInterval int `yaml:"max-retry-interval" json:"max-retry-interval"`

 	// ClaudeKey defines a list of Claude API key configurations as specified in the YAML configuration file.
 	ClaudeKey []ClaudeKey `yaml:"claude-api-key" json:"claude-api-key"`
@@ -78,6 +83,19 @@ type Config struct {

 	// Payload defines default and override rules for provider payload parameters.
 	Payload PayloadConfig `yaml:"payload" json:"payload"`
+
+	// OAuthExcludedModels defines per-provider global model exclusions applied to OAuth/file-backed auth entries.
+	OAuthExcludedModels map[string][]string `yaml:"oauth-excluded-models,omitempty" json:"oauth-excluded-models,omitempty"`
+}
+
+// TLSConfig holds HTTPS server settings.
+type TLSConfig struct {
+	// Enable toggles HTTPS server mode.
+	Enable bool `yaml:"enable" json:"enable"`
+	// Cert is the path to the TLS certificate file.
+	Cert string `yaml:"cert" json:"cert"`
+	// Key is the path to the TLS private key file.
+	Key string `yaml:"key" json:"key"`
 }

 // RemoteManagement holds management API configuration under 'remote-management'.
@@ -142,6 +160,9 @@ type ClaudeKey struct {

 	// Headers optionally adds extra HTTP headers for requests sent with this key.
 	Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"`
+
+	// ExcludedModels lists model IDs that should be excluded for this provider.
+	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }

 // ClaudeModel describes a mapping between an alias and the actual upstream model name.
@@ -168,6 +189,9 @@ type CodexKey struct {

 	// Headers optionally adds extra HTTP headers for requests sent with this key.
 	Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"`
+
+	// ExcludedModels lists model IDs that should be excluded for this provider.
+	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }

 // GeminiKey represents the configuration for a Gemini API key,
@@ -184,6 +208,9 @@ type GeminiKey struct {

 	// Headers optionally adds extra HTTP headers for requests sent with this key.
 	Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"`
+
+	// ExcludedModels lists model IDs that should be excluded for this provider.
+	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }

 // OpenAICompatibility represents the configuration for OpenAI API compatibility
@@ -307,6 +334,9 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Sanitize OpenAI compatibility providers: drop entries without base-url
 	cfg.SanitizeOpenAICompatibility()

+	// Normalize OAuth provider model exclusion map.
+	cfg.OAuthExcludedModels = NormalizeOAuthExcludedModels(cfg.OAuthExcludedModels)
+
 	// Return the populated configuration struct.
 	return &cfg, nil
 }
@@ -344,6 +374,7 @@ func (cfg *Config) SanitizeCodexKeys() {
 		e := cfg.CodexKey[i]
 		e.BaseURL = strings.TrimSpace(e.BaseURL)
 		e.Headers = NormalizeHeaders(e.Headers)
+		e.ExcludedModels = NormalizeExcludedModels(e.ExcludedModels)
 		if e.BaseURL == "" {
 			continue
 		}
@@ -360,6 +391,7 @@ func (cfg *Config) SanitizeClaudeKeys() {
 	for i := range cfg.ClaudeKey {
 		entry := &cfg.ClaudeKey[i]
 		entry.Headers = NormalizeHeaders(entry.Headers)
+		entry.ExcludedModels = NormalizeExcludedModels(entry.ExcludedModels)
 	}
 }

@@ -380,6 +412,7 @@ func (cfg *Config) SanitizeGeminiKeys() {
 		entry.BaseURL = strings.TrimSpace(entry.BaseURL)
 		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 		entry.Headers = NormalizeHeaders(entry.Headers)
+		entry.ExcludedModels = NormalizeExcludedModels(entry.ExcludedModels)
 		if _, exists := seen[entry.APIKey]; exists {
 			continue
 		}
@@ -442,6 +475,55 @@ func NormalizeHeaders(headers map[string]string) map[string]string {
 	return clean
 }

+// NormalizeExcludedModels trims, lowercases, and deduplicates model exclusion patterns.
+// It preserves the order of first occurrences and drops empty entries.
+func NormalizeExcludedModels(models []string) []string {
+	if len(models) == 0 {
+		return nil
+	}
+	seen := make(map[string]struct{}, len(models))
+	out := make([]string, 0, len(models))
+	for _, raw := range models {
+		trimmed := strings.ToLower(strings.TrimSpace(raw))
+		if trimmed == "" {
+			continue
+		}
+		if _, exists := seen[trimmed]; exists {
+			continue
+		}
+		seen[trimmed] = struct{}{}
+		out = append(out, trimmed)
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+// NormalizeOAuthExcludedModels cleans provider -> excluded models mappings by normalizing provider keys
+// and applying model exclusion normalization to each entry.
+func NormalizeOAuthExcludedModels(entries map[string][]string) map[string][]string {
+	if len(entries) == 0 {
+		return nil
+	}
+	out := make(map[string][]string, len(entries))
+	for provider, models := range entries {
+		key := strings.ToLower(strings.TrimSpace(provider))
+		if key == "" {
+			continue
+		}
+		normalized := NormalizeExcludedModels(models)
+		if len(normalized) == 0 {
+			continue
+		}
+		out[key] = normalized
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
 // hashSecret hashes the given secret using bcrypt.
 func hashSecret(secret string) (string, error) {
 	// Use default cost for simplicity.
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -12,6 +12,7 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
+	"sort"
 	"strings"
 	"time"

@@ -156,17 +157,30 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
 // Returns:
 //   - error: An error if logging fails, nil otherwise
 func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage) error {
-	if !l.enabled {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false)
+}
+
+// LogRequestWithOptions logs a request with optional forced logging behavior.
+// The force flag allows writing error logs even when regular request logging is disabled.
+func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force)
+}
+
+func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool) error {
+	if !l.enabled && !force {
 		return nil
 	}

 	// Ensure logs directory exists
-	if err := l.ensureLogsDir(); err != nil {
-		return fmt.Errorf("failed to create logs directory: %w", err)
+	if errEnsure := l.ensureLogsDir(); errEnsure != nil {
+		return fmt.Errorf("failed to create logs directory: %w", errEnsure)
 	}

 	// Generate filename
 	filename := l.generateFilename(url)
+	if force && !l.enabled {
+		filename = l.generateErrorFilename(url)
+	}
 	filePath := filepath.Join(l.logsDir, filename)

 	// Decompress response if needed
@@ -184,6 +198,12 @@ func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[st
 		return fmt.Errorf("failed to write log file: %w", err)
 	}

+	if force && !l.enabled {
+		if errCleanup := l.cleanupOldErrorLogs(); errCleanup != nil {
+			log.WithError(errCleanup).Warn("failed to clean up old error logs")
+		}
+	}
+
 	return nil
 }

@@ -239,6 +259,11 @@ func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[
 	return writer, nil
 }

+// generateErrorFilename creates a filename with an error prefix to differentiate forced error logs.
+func (l *FileRequestLogger) generateErrorFilename(url string) string {
+	return fmt.Sprintf("error-%s", l.generateFilename(url))
+}
+
 // ensureLogsDir creates the logs directory if it doesn't exist.
 //
 // Returns:
@@ -312,6 +337,52 @@ func (l *FileRequestLogger) sanitizeForFilename(path string) string {
 	return sanitized
 }

+// cleanupOldErrorLogs keeps only the newest 10 forced error log files.
+func (l *FileRequestLogger) cleanupOldErrorLogs() error {
+	entries, errRead := os.ReadDir(l.logsDir)
+	if errRead != nil {
+		return errRead
+	}
+
+	type logFile struct {
+		name    string
+		modTime time.Time
+	}
+
+	var files []logFile
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+		if !strings.HasPrefix(name, "error-") || !strings.HasSuffix(name, ".log") {
+			continue
+		}
+		info, errInfo := entry.Info()
+		if errInfo != nil {
+			log.WithError(errInfo).Warn("failed to read error log info")
+			continue
+		}
+		files = append(files, logFile{name: name, modTime: info.ModTime()})
+	}
+
+	if len(files) <= 10 {
+		return nil
+	}
+
+	sort.Slice(files, func(i, j int) bool {
+		return files[i].modTime.After(files[j].modTime)
+	})
+
+	for _, file := range files[10:] {
+		if errRemove := os.Remove(filepath.Join(l.logsDir, file.name)); errRemove != nil {
+			log.WithError(errRemove).Warnf("failed to remove old error log: %s", file.name)
+		}
+	}
+
+	return nil
+}
+
 // formatLogContent creates the complete log content for non-streaming requests.
 //
 // Parameters:
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -8,60 +8,140 @@ func GetClaudeModels() []*ModelInfo {
 	return []*ModelInfo{

 		{
-			ID:          "claude-haiku-4-5-20251001",
-			Object:      "model",
-			Created:     1759276800, // 2025-10-01
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4.5 Haiku",
+			ID:                  "claude-haiku-4-5-20251001",
+			Object:              "model",
+			Created:             1759276800, // 2025-10-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Haiku",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
 		},
 		{
-			ID:          "claude-sonnet-4-5-20250929",
-			Object:      "model",
-			Created:     1759104000, // 2025-09-29
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4.5 Sonnet",
+			ID:                  "claude-sonnet-4-5-20250929",
+			Object:              "model",
+			Created:             1759104000, // 2025-09-29
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
 		},
 		{
-			ID:          "claude-opus-4-1-20250805",
-			Object:      "model",
-			Created:     1722945600, // 2025-08-05
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4.1 Opus",
+			ID:                  "claude-sonnet-4-5-thinking",
+			Object:              "model",
+			Created:             1759104000, // 2025-09-29
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Sonnet Thinking",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-opus-4-20250514",
-			Object:      "model",
-			Created:     1715644800, // 2025-05-14
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4 Opus",
+			ID:                  "claude-opus-4-5-thinking",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-sonnet-4-20250514",
-			Object:      "model",
-			Created:     1715644800, // 2025-05-14
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4 Sonnet",
+			ID:                  "claude-opus-4-5-thinking-low",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking Low",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-3-7-sonnet-20250219",
-			Object:      "model",
-			Created:     1708300800, // 2025-02-19
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 3.7 Sonnet",
+			ID:                  "claude-opus-4-5-thinking-medium",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking Medium",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-3-5-haiku-20241022",
-			Object:      "model",
-			Created:     1729555200, // 2024-10-22
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 3.5 Haiku",
+			ID:                  "claude-opus-4-5-thinking-high",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking High",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                  "claude-opus-4-5-20251101",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus",
+			Description:         "Premium model combining maximum intelligence with practical performance",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+		},
+		{
+			ID:                  "claude-opus-4-1-20250805",
+			Object:              "model",
+			Created:             1722945600, // 2025-08-05
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.1 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+		},
+		{
+			ID:                  "claude-opus-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+		},
+		{
+			ID:                  "claude-sonnet-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+		},
+		{
+			ID:                  "claude-3-7-sonnet-20250219",
+			Object:              "model",
+			Created:             1708300800, // 2025-02-19
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.7 Sonnet",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+		},
+		{
+			ID:                  "claude-3-5-haiku-20241022",
+			Object:              "model",
+			Created:             1729555200, // 2024-10-22
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.5 Haiku",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
 		},
 	}
 }
@@ -129,6 +209,20 @@ func GetGeminiModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		},
 	}
 }

--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -826,7 +826,6 @@ func (r *ModelRegistry) CleanupExpiredQuotas() {
 	}
 }

-
 // GetFirstAvailableModel returns the first available model for the given handler type.
 // It prioritizes models by their creation timestamp (newest first) and checks if they have
 // available clients that are not suspended or over quota.
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -319,6 +319,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
 	payload = fixGeminiImageAspectRatio(req.Model, payload)
 	payload = applyPayloadConfig(e.cfg, req.Model, payload)
+	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
+	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
+	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
 	metadataAction := "generateContent"
 	if req.Metadata != nil {
 		if action, _ := req.Metadata["action"].(string); action == "countTokens" {
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -26,16 +26,18 @@ import (
 )

 const (
-	antigravityBaseURL          = "https://daily-cloudcode-pa.sandbox.googleapis.com"
-	antigravityStreamPath       = "/v1internal:streamGenerateContent"
-	antigravityGeneratePath     = "/v1internal:generateContent"
-	antigravityModelsPath       = "/v1internal:fetchAvailableModels"
-	antigravityClientID         = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-	antigravityClientSecret     = "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"
-	defaultAntigravityAgent     = "antigravity/1.11.3 windows/amd64"
-	antigravityAuthType         = "antigravity"
-	refreshSkew                 = 5 * time.Minute
-	streamScannerBuffer     int = 20_971_520
+	antigravityBaseURLDaily        = "https://daily-cloudcode-pa.sandbox.googleapis.com"
+	antigravityBaseURLAutopush     = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
+	antigravityBaseURLProd         = "https://cloudcode-pa.googleapis.com"
+	antigravityStreamPath          = "/v1internal:streamGenerateContent"
+	antigravityGeneratePath        = "/v1internal:generateContent"
+	antigravityModelsPath          = "/v1internal:fetchAvailableModels"
+	antigravityClientID            = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	antigravityClientSecret        = "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"
+	defaultAntigravityAgent        = "antigravity/1.11.5 windows/amd64"
+	antigravityAuthType            = "antigravity"
+	refreshSkew                    = 3000 * time.Second
+	streamScannerBuffer        int = 20_971_520
 )

 var randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
@@ -73,43 +75,76 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("antigravity")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

-	httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt)
-	if errReq != nil {
-		return resp, errReq
-	}
-
+	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-	httpResp, errDo := httpClient.Do(httpReq)
-	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
-		return resp, errDo
-	}
-	defer func() {
+
+	var lastStatus int
+	var lastBody []byte
+	var lastErr error
+
+	for idx, baseURL := range baseURLs {
+		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL)
+		if errReq != nil {
+			err = errReq
+			return resp, err
+		}
+
+		httpResp, errDo := httpClient.Do(httpReq)
+		if errDo != nil {
+			recordAPIResponseError(ctx, e.cfg, errDo)
+			lastStatus = 0
+			lastBody = nil
+			lastErr = errDo
+			if idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+				continue
+			}
+			err = errDo
+			return resp, err
+		}
+
+		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+		bodyBytes, errRead := io.ReadAll(httpResp.Body)
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("antigravity executor: close response body error: %v", errClose)
 		}
-	}()
+		if errRead != nil {
+			recordAPIResponseError(ctx, e.cfg, errRead)
+			err = errRead
+			return resp, err
+		}
+		appendAPIResponseChunk(ctx, e.cfg, bodyBytes)

-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-	bodyBytes, errRead := io.ReadAll(httpResp.Body)
-	if errRead != nil {
-		recordAPIResponseError(ctx, e.cfg, errRead)
-		return resp, errRead
-	}
-	appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+			log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes))
+			lastStatus = httpResp.StatusCode
+			lastBody = append([]byte(nil), bodyBytes...)
+			lastErr = nil
+			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+				continue
+			}
+			err = statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+			return resp, err
+		}

-	if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-		log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes))
-		err = statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-		return resp, err
+		reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
+		var param any
+		converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
+		resp = cliproxyexecutor.Response{Payload: []byte(converted)}
+		reporter.ensurePublished(ctx)
+		return resp, nil
 	}

-	reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
-	var param any
-	converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(converted)}
-	reporter.ensurePublished(ctx)
-	return resp, nil
+	switch {
+	case lastStatus != 0:
+		err = statusErr{code: lastStatus, msg: string(lastBody)}
+	case lastErr != nil:
+		err = lastErr
+	default:
+		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
+	}
+	return resp, err
 }

 // ExecuteStream handles streaming requests via the antigravity upstream.
@@ -131,75 +166,121 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	to := sdktranslator.FromString("antigravity")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt)
-	if errReq != nil {
-		return nil, errReq
-	}
-
+	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-	httpResp, errDo := httpClient.Do(httpReq)
-	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
-		return nil, errDo
-	}
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-	if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-		bodyBytes, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
-		if errClose := httpResp.Body.Close(); errClose != nil {
-			log.Errorf("antigravity executor: close response body error: %v", errClose)
-		}
-		err = statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-		return nil, err
-	}

-	out := make(chan cliproxyexecutor.StreamChunk)
-	stream = out
-	go func() {
-		defer close(out)
-		defer func() {
+	var lastStatus int
+	var lastBody []byte
+	var lastErr error
+
+	for idx, baseURL := range baseURLs {
+		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
+		if errReq != nil {
+			err = errReq
+			return nil, err
+		}
+
+		httpResp, errDo := httpClient.Do(httpReq)
+		if errDo != nil {
+			recordAPIResponseError(ctx, e.cfg, errDo)
+			lastStatus = 0
+			lastBody = nil
+			lastErr = errDo
+			if idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+				continue
+			}
+			err = errDo
+			return nil, err
+		}
+		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+			bodyBytes, errRead := io.ReadAll(httpResp.Body)
 			if errClose := httpResp.Body.Close(); errClose != nil {
 				log.Errorf("antigravity executor: close response body error: %v", errClose)
 			}
-		}()
-		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, streamScannerBuffer)
-		var param any
-		for scanner.Scan() {
-			line := scanner.Bytes()
-			appendAPIResponseChunk(ctx, e.cfg, line)
-
-			// Filter usage metadata for all models
-			// Only retain usage statistics in the terminal chunk
-			line = FilterSSEUsageMetadata(line)
-
-			payload := jsonPayload(line)
-			if payload == nil {
+			if errRead != nil {
+				recordAPIResponseError(ctx, e.cfg, errRead)
+				lastStatus = 0
+				lastBody = nil
+				lastErr = errRead
+				if idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				err = errRead
+				return nil, err
+			}
+			appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+			lastStatus = httpResp.StatusCode
+			lastBody = append([]byte(nil), bodyBytes...)
+			lastErr = nil
+			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 				continue
 			}
+			err = statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+			return nil, err
+		}

-			if detail, ok := parseAntigravityStreamUsage(payload); ok {
-				reporter.publish(ctx, detail)
-			}
+		out := make(chan cliproxyexecutor.StreamChunk)
+		stream = out
+		go func(resp *http.Response) {
+			defer close(out)
+			defer func() {
+				if errClose := resp.Body.Close(); errClose != nil {
+					log.Errorf("antigravity executor: close response body error: %v", errClose)
+				}
+			}()
+			scanner := bufio.NewScanner(resp.Body)
+			scanner.Buffer(nil, streamScannerBuffer)
+			var param any
+			for scanner.Scan() {
+				line := scanner.Bytes()
+				appendAPIResponseChunk(ctx, e.cfg, line)

-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
-			for i := range chunks {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+				// Filter usage metadata for all models
+				// Only retain usage statistics in the terminal chunk
+				line = FilterSSEUsageMetadata(line)
+
+				payload := jsonPayload(line)
+				if payload == nil {
+					continue
+				}
+
+				if detail, ok := parseAntigravityStreamUsage(payload); ok {
+					reporter.publish(ctx, detail)
+				}
+
+				chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
+				for i := range chunks {
+					out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+				}
 			}
-		}
-		tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
-		for i := range tail {
-			out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
-		}
-		if errScan := scanner.Err(); errScan != nil {
-			recordAPIResponseError(ctx, e.cfg, errScan)
-			reporter.publishFailure(ctx)
-			out <- cliproxyexecutor.StreamChunk{Err: errScan}
-		} else {
-			reporter.ensurePublished(ctx)
-		}
-	}()
-	return stream, nil
+			tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
+			for i := range tail {
+				out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
+			}
+			if errScan := scanner.Err(); errScan != nil {
+				recordAPIResponseError(ctx, e.cfg, errScan)
+				reporter.publishFailure(ctx)
+				out <- cliproxyexecutor.StreamChunk{Err: errScan}
+			} else {
+				reporter.ensurePublished(ctx)
+			}
+		}(httpResp)
+		return stream, nil
+	}
+
+	switch {
+	case lastStatus != 0:
+		err = statusErr{code: lastStatus, msg: string(lastBody)}
+	case lastErr != nil:
+		err = lastErr
+	default:
+		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
+	}
+	return nil, err
 }

 // Refresh refreshes the OAuth token using the refresh token.
@@ -230,54 +311,86 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		auth = updatedAuth
 	}

-	modelsURL := buildBaseURL(auth) + antigravityModelsPath
-	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, modelsURL, bytes.NewReader([]byte(`{}`)))
-	if errReq != nil {
-		return nil
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-	httpReq.Header.Set("Authorization", "Bearer "+token)
-	httpReq.Header.Set("User-Agent", resolveUserAgent(auth))
-	if host := resolveHost(auth); host != "" {
-		httpReq.Host = host
-	}
-
+	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0)
-	httpResp, errDo := httpClient.Do(httpReq)
-	if errDo != nil {
-		return nil
-	}
-	defer func() {
+
+	for idx, baseURL := range baseURLs {
+		modelsURL := baseURL + antigravityModelsPath
+		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, modelsURL, bytes.NewReader([]byte(`{}`)))
+		if errReq != nil {
+			return nil
+		}
+		httpReq.Header.Set("Content-Type", "application/json")
+		httpReq.Header.Set("Authorization", "Bearer "+token)
+		httpReq.Header.Set("User-Agent", resolveUserAgent(auth))
+		if host := resolveHost(baseURL); host != "" {
+			httpReq.Host = host
+		}
+
+		httpResp, errDo := httpClient.Do(httpReq)
+		if errDo != nil {
+			if idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: models request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+				continue
+			}
+			return nil
+		}
+
+		bodyBytes, errRead := io.ReadAll(httpResp.Body)
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("antigravity executor: close response body error: %v", errClose)
 		}
-	}()
+		if errRead != nil {
+			if idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: models read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+				continue
+			}
+			return nil
+		}
+		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: models request rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+				continue
+			}
+			return nil
+		}

-	bodyBytes, errRead := io.ReadAll(httpResp.Body)
-	if errRead != nil {
-		return nil
-	}
-	if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-		return nil
-	}
+		result := gjson.GetBytes(bodyBytes, "models")
+		if !result.Exists() {
+			return nil
+		}

-	result := gjson.GetBytes(bodyBytes, "models")
-	if !result.Exists() {
-		return nil
+		now := time.Now().Unix()
+		models := make([]*registry.ModelInfo, 0, len(result.Map()))
+		for id := range result.Map() {
+			id = modelName2Alias(id)
+			if id != "" {
+				modelInfo := &registry.ModelInfo{
+					ID:          id,
+					Name:        id,
+					Description: id,
+					DisplayName: id,
+					Version:     id,
+					Object:      "model",
+					Created:     now,
+					OwnedBy:     antigravityAuthType,
+					Type:        antigravityAuthType,
+				}
+				// Add Thinking support for thinking models
+				if strings.HasSuffix(id, "-thinking") || strings.Contains(id, "-thinking-") {
+					modelInfo.Thinking = &registry.ThinkingSupport{
+						Min:            1024,
+						Max:            100000,
+						ZeroAllowed:    false,
+						DynamicAllowed: true,
+					}
+				}
+				models = append(models, modelInfo)
+			}
+		}
+		return models
 	}
-
-	now := time.Now().Unix()
-	models := make([]*registry.ModelInfo, 0, len(result.Map()))
-	for id := range result.Map() {
-		models = append(models, &registry.ModelInfo{
-			ID:      id,
-			Object:  "model",
-			Created: now,
-			OwnedBy: antigravityAuthType,
-			Type:    antigravityAuthType,
-		})
-	}
-	return models
+	return nil
 }

 func (e *AntigravityExecutor) ensureAccessToken(ctx context.Context, auth *cliproxyauth.Auth) (string, *cliproxyauth.Auth, error) {
@@ -363,12 +476,15 @@ func (e *AntigravityExecutor) refreshToken(ctx context.Context, auth *cliproxyau
 	return auth, nil
 }

-func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyauth.Auth, token, modelName string, payload []byte, stream bool, alt string) (*http.Request, error) {
+func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyauth.Auth, token, modelName string, payload []byte, stream bool, alt, baseURL string) (*http.Request, error) {
 	if token == "" {
 		return nil, statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
 	}

-	base := buildBaseURL(auth)
+	base := strings.TrimSuffix(baseURL, "/")
+	if base == "" {
+		base = buildBaseURL(auth)
+	}
 	path := antigravityGeneratePath
 	if stream {
 		path = antigravityStreamPath
@@ -389,6 +505,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	}

 	payload = geminiToAntigravity(modelName, payload)
+	payload, _ = sjson.SetBytes(payload, "model", alias2ModelName(modelName))
 	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), bytes.NewReader(payload))
 	if errReq != nil {
 		return nil, errReq
@@ -401,7 +518,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	} else {
 		httpReq.Header.Set("Accept", "application/json")
 	}
-	if host := resolveHost(auth); host != "" {
+	if host := resolveHost(base); host != "" {
 		httpReq.Host = host
 	}

@@ -485,26 +602,13 @@ func int64Value(value any) (int64, bool) {
 }

 func buildBaseURL(auth *cliproxyauth.Auth) string {
-	if auth != nil {
-		if auth.Attributes != nil {
-			if v := strings.TrimSpace(auth.Attributes["base_url"]); v != "" {
-				return strings.TrimSuffix(v, "/")
-			}
-		}
-		if auth.Metadata != nil {
-			if v, ok := auth.Metadata["base_url"].(string); ok {
-				v = strings.TrimSpace(v)
-				if v != "" {
-					return strings.TrimSuffix(v, "/")
-				}
-			}
-		}
+	if baseURLs := antigravityBaseURLFallbackOrder(auth); len(baseURLs) > 0 {
+		return baseURLs[0]
 	}
-	return antigravityBaseURL
+	return antigravityBaseURLAutopush
 }

-func resolveHost(auth *cliproxyauth.Auth) string {
-	base := buildBaseURL(auth)
+func resolveHost(base string) string {
 	parsed, errParse := url.Parse(base)
 	if errParse != nil {
 		return ""
@@ -531,6 +635,37 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
 	return defaultAntigravityAgent
 }

+func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
+	if base := resolveCustomAntigravityBaseURL(auth); base != "" {
+		return []string{base}
+	}
+	return []string{
+		antigravityBaseURLDaily,
+		antigravityBaseURLAutopush,
+		// antigravityBaseURLProd,
+	}
+}
+
+func resolveCustomAntigravityBaseURL(auth *cliproxyauth.Auth) string {
+	if auth == nil {
+		return ""
+	}
+	if auth.Attributes != nil {
+		if v := strings.TrimSpace(auth.Attributes["base_url"]); v != "" {
+			return strings.TrimSuffix(v, "/")
+		}
+	}
+	if auth.Metadata != nil {
+		if v, ok := auth.Metadata["base_url"].(string); ok {
+			v = strings.TrimSpace(v)
+			if v != "" {
+				return strings.TrimSuffix(v, "/")
+			}
+		}
+	}
+	return ""
+}
+
 func geminiToAntigravity(modelName string, payload []byte) []byte {
 	template, _ := sjson.Set(string(payload), "model", modelName)
 	template, _ = sjson.Set(template, "userAgent", "antigravity")
@@ -540,18 +675,27 @@ func geminiToAntigravity(modelName string, payload []byte) []byte {

 	template, _ = sjson.Delete(template, "request.safetySettings")
 	template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
+	template, _ = sjson.Delete(template, "request.generationConfig.maxOutputTokens")
+	if !strings.HasPrefix(modelName, "gemini-3-") {
+		if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() {
+			template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel")
+			template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
+		}
+	}

-	gjson.Get(template, "request.contents").ForEach(func(key, content gjson.Result) bool {
-		if content.Get("role").String() == "model" {
-			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
-				if part.Get("functionCall").Exists() {
-					template, _ = sjson.Set(template, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+	if strings.HasPrefix(modelName, "claude-sonnet-") {
+		gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
+			tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
+				if funcDecl.Get("parametersJsonSchema").Exists() {
+					template, _ = sjson.SetRaw(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parameters", key.Int(), funKey.Int()), funcDecl.Get("parametersJsonSchema").Raw)
+					template, _ = sjson.Delete(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parameters.$schema", key.Int(), funKey.Int()))
+					template, _ = sjson.Delete(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parametersJsonSchema", key.Int(), funKey.Int()))
 				}
 				return true
 			})
-		}
-		return true
-	})
+			return true
+		})
+	}

 	return []byte(template)
 }
@@ -573,3 +717,39 @@ func generateProjectID() string {
 	randomPart := strings.ToLower(uuid.NewString())[:5]
 	return adj + "-" + noun + "-" + randomPart
 }
+
+func modelName2Alias(modelName string) string {
+	switch modelName {
+	case "rev19-uic3-1p":
+		return "gemini-2.5-computer-use-preview-10-2025"
+	case "gemini-3-pro-image":
+		return "gemini-3-pro-image-preview"
+	case "gemini-3-pro-high":
+		return "gemini-3-pro-preview"
+	case "claude-sonnet-4-5":
+		return "gemini-claude-sonnet-4-5"
+	case "claude-sonnet-4-5-thinking":
+		return "gemini-claude-sonnet-4-5-thinking"
+	case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
+		return ""
+	default:
+		return modelName
+	}
+}
+
+func alias2ModelName(modelName string) string {
+	switch modelName {
+	case "gemini-2.5-computer-use-preview-10-2025":
+		return "rev19-uic3-1p"
+	case "gemini-3-pro-image-preview":
+		return "gemini-3-pro-image"
+	case "gemini-3-pro-preview":
+		return "gemini-3-pro-high"
+	case "gemini-claude-sonnet-4-5":
+		return "claude-sonnet-4-5"
+	case "gemini-claude-sonnet-4-5-thinking":
+		return "claude-sonnet-4-5-thinking"
+	default:
+		return modelName
+	}
+}
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -17,6 +17,7 @@ import (
 	claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -58,18 +59,27 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		body, _ = sjson.SetBytes(body, "model", modelOverride)
 		modelForUpstream = modelOverride
 	}
+	// Inject thinking config based on model suffix for thinking variants
+	body = e.injectThinkingConfig(req.Model, body)

 	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
-		body, _ = sjson.SetRawBytes(body, "system", []byte(misc.ClaudeCodeInstructions))
+		body = checkSystemInstructions(body)
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)

+	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
+	body = ensureMaxTokensForThinking(req.Model, body)
+
+	// Extract betas from body and convert to header
+	var extraBetas []string
+	extraBetas, body = extractAndRemoveBetas(body)
+
 	url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL)
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return resp, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, false)
+	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -154,15 +164,24 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
 		body, _ = sjson.SetBytes(body, "model", modelOverride)
 	}
-	body, _ = sjson.SetRawBytes(body, "system", []byte(misc.ClaudeCodeInstructions))
+	// Inject thinking config based on model suffix for thinking variants
+	body = e.injectThinkingConfig(req.Model, body)
+	body = checkSystemInstructions(body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)

+	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
+	body = ensureMaxTokensForThinking(req.Model, body)
+
+	// Extract betas from body and convert to header
+	var extraBetas []string
+	extraBetas, body = extractAndRemoveBetas(body)
+
 	url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL)
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return nil, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, true)
+	applyClaudeHeaders(httpReq, auth, apiKey, true, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -283,15 +302,19 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	}

 	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
-		body, _ = sjson.SetRawBytes(body, "system", []byte(misc.ClaudeCodeInstructions))
+		body = checkSystemInstructions(body)
 	}

+	// Extract betas from body and convert to header (for count_tokens too)
+	var extraBetas []string
+	extraBetas, body = extractAndRemoveBetas(body)
+
 	url := fmt.Sprintf("%s/v1/messages/count_tokens?beta=true", baseURL)
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, false)
+	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -383,10 +406,101 @@ func (e *ClaudeExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (
 	return auth, nil
 }

+// extractAndRemoveBetas extracts the "betas" array from the body and removes it.
+// Returns the extracted betas as a string slice and the modified body.
+func extractAndRemoveBetas(body []byte) ([]string, []byte) {
+	betasResult := gjson.GetBytes(body, "betas")
+	if !betasResult.Exists() {
+		return nil, body
+	}
+	var betas []string
+	if betasResult.IsArray() {
+		for _, item := range betasResult.Array() {
+			if s := strings.TrimSpace(item.String()); s != "" {
+				betas = append(betas, s)
+			}
+		}
+	} else if s := strings.TrimSpace(betasResult.String()); s != "" {
+		betas = append(betas, s)
+	}
+	body, _ = sjson.DeleteBytes(body, "betas")
+	return betas, body
+}
+
+// injectThinkingConfig adds thinking configuration based on model name suffix
+func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
+	// Only inject if thinking config is not already present
+	if gjson.GetBytes(body, "thinking").Exists() {
+		return body
+	}
+
+	var budgetTokens int
+	switch {
+	case strings.HasSuffix(modelName, "-thinking-low"):
+		budgetTokens = 1024
+	case strings.HasSuffix(modelName, "-thinking-medium"):
+		budgetTokens = 8192
+	case strings.HasSuffix(modelName, "-thinking-high"):
+		budgetTokens = 24576
+	case strings.HasSuffix(modelName, "-thinking"):
+		// Default thinking without suffix uses medium budget
+		budgetTokens = 8192
+	default:
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
+	body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
+	return body
+}
+
+// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
+// Anthropic API requires this constraint; violating it returns a 400 error.
+// This function should be called after all thinking configuration is finalized.
+// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
+func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
+	thinkingType := gjson.GetBytes(body, "thinking.type").String()
+	if thinkingType != "enabled" {
+		return body
+	}
+
+	budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
+	if budgetTokens <= 0 {
+		return body
+	}
+
+	maxTokens := gjson.GetBytes(body, "max_tokens").Int()
+
+	// Look up the model's max completion tokens from the registry
+	maxCompletionTokens := 0
+	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil {
+		maxCompletionTokens = modelInfo.MaxCompletionTokens
+	}
+
+	// Fall back to budget + buffer if registry lookup fails or returns 0
+	const fallbackBuffer = 4000
+	requiredMaxTokens := budgetTokens + fallbackBuffer
+	if maxCompletionTokens > 0 {
+		requiredMaxTokens = int64(maxCompletionTokens)
+	}
+
+	if maxTokens < requiredMaxTokens {
+		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
+	}
+	return body
+}
+
 func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
 	if alias == "" {
 		return ""
 	}
+	// Hardcoded mappings for thinking models to actual Claude model names
+	switch alias {
+	case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
+		return "claude-opus-4-5-20251101"
+	case "claude-sonnet-4-5-thinking":
+		return "claude-sonnet-4-5-20250929"
+	}
 	entry := e.resolveClaudeConfig(auth)
 	if entry == nil {
 		return ""
@@ -530,7 +644,7 @@ func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadClos
 	return body, nil
 }

-func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool) {
+func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool, extraBetas []string) {
 	r.Header.Set("Authorization", "Bearer "+apiKey)
 	r.Header.Set("Content-Type", "application/json")

@@ -539,15 +653,30 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 		ginHeaders = ginCtx.Request.Header
 	}

+	baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
 	if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" {
+		baseBetas = val
 		if !strings.Contains(val, "oauth") {
-			val += ",oauth-2025-04-20"
+			baseBetas += ",oauth-2025-04-20"
 		}
-		r.Header.Set("Anthropic-Beta", val)
-	} else {
-		r.Header.Set("Anthropic-Beta", "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14")
 	}

+	// Merge extra betas from request body
+	if len(extraBetas) > 0 {
+		existingSet := make(map[string]bool)
+		for _, b := range strings.Split(baseBetas, ",") {
+			existingSet[strings.TrimSpace(b)] = true
+		}
+		for _, beta := range extraBetas {
+			beta = strings.TrimSpace(beta)
+			if beta != "" && !existingSet[beta] {
+				baseBetas += "," + beta
+				existingSet[beta] = true
+			}
+		}
+	}
+	r.Header.Set("Anthropic-Beta", baseBetas)
+
 	misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Version", "2023-06-01")
 	misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Dangerous-Direct-Browser-Access", "true")
 	misc.EnsureHeader(r.Header, ginHeaders, "X-App", "cli")
@@ -590,3 +719,22 @@ func claudeCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
 	}
 	return
 }
+
+func checkSystemInstructions(payload []byte) []byte {
+	system := gjson.GetBytes(payload, "system")
+	claudeCodeInstructions := `[{"type":"text","text":"You are Claude Code, Anthropic's official CLI for Claude."}]`
+	if system.IsArray() {
+		if gjson.GetBytes(payload, "system.0.text").String() != "You are Claude Code, Anthropic's official CLI for Claude." {
+			system.ForEach(func(_, part gjson.Result) bool {
+				if part.Get("type").String() == "text" {
+					claudeCodeInstructions, _ = sjson.SetRaw(claudeCodeInstructions, "-1", part.Raw)
+				}
+				return true
+			})
+			payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		}
+	} else {
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+	}
+	return payload
+}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -98,6 +98,20 @@ func ConvertGeminiRequestToAntigravity(_ string, inputRawJSON []byte, _ bool) []
 		}
 	}

+	gjson.GetBytes(rawJSON, "request.contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})
+
 	return common.AttachDefaultSafetySettings(rawJSON, "request.safetySettings")
 }

--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -271,7 +271,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 								if resp == "" {
 									resp = "{}"
 								}
-								toolNode, _ = sjson.SetBytes(toolNode, "parts."+itoa(pp)+".functionResponse.response.result", []byte(resp))
+								// Handle non-JSON output gracefully (matches dev branch approach)
+								if resp != "null" {
+									parsed := gjson.Parse(resp)
+									if parsed.Type == gjson.JSON {
+										toolNode, _ = sjson.SetRawBytes(toolNode, "parts."+itoa(pp)+".functionResponse.response.result", []byte(parsed.Raw))
+									} else {
+										toolNode, _ = sjson.SetBytes(toolNode, "parts."+itoa(pp)+".functionResponse.response.result", resp)
+									}
+								}
 								pp++
 							}
 						}
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
@@ -105,14 +105,19 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
 			partTextResult := partResult.Get("text")
 			functionCallResult := partResult.Get("functionCall")
 			thoughtSignatureResult := partResult.Get("thoughtSignature")
+			if !thoughtSignatureResult.Exists() {
+				thoughtSignatureResult = partResult.Get("thought_signature")
+			}
 			inlineDataResult := partResult.Get("inlineData")
 			if !inlineDataResult.Exists() {
 				inlineDataResult = partResult.Get("inline_data")
 			}

-			// Handle thoughtSignature - this is encrypted reasoning content that should not be exposed to the client
-			if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" {
-				// Skip thoughtSignature processing - it's internal encrypted data
+			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+			// Ignore encrypted thoughtSignature but keep any actual content in the same part.
+			if hasThoughtSignature && !hasContentPayload {
 				continue
 			}

--- a/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
+++ b/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
@@ -98,6 +98,20 @@ func ConvertGeminiRequestToGeminiCLI(_ string, inputRawJSON []byte, _ bool) []by
 		}
 	}

+	gjson.GetBytes(rawJSON, "request.contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})
+
 	return common.AttachDefaultSafetySettings(rawJSON, "request.safetySettings")
 }

--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
@@ -105,14 +105,19 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 			partTextResult := partResult.Get("text")
 			functionCallResult := partResult.Get("functionCall")
 			thoughtSignatureResult := partResult.Get("thoughtSignature")
+			if !thoughtSignatureResult.Exists() {
+				thoughtSignatureResult = partResult.Get("thought_signature")
+			}
 			inlineDataResult := partResult.Get("inlineData")
 			if !inlineDataResult.Exists() {
 				inlineDataResult = partResult.Get("inline_data")
 			}

-			// Handle thoughtSignature - this is encrypted reasoning content that should not be exposed to the client
-			if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" {
-				// Skip thoughtSignature processing - it's internal encrypted data
+			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+			// Ignore encrypted thoughtSignature but keep any actual content in the same part.
+			if hasThoughtSignature && !hasContentPayload {
 				continue
 			}

--- a/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
+++ b/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
@@ -46,5 +46,19 @@ func ConvertGeminiCLIRequestToGemini(_ string, inputRawJSON []byte, _ bool) []by
 		}
 	}

+	gjson.GetBytes(rawJSON, "contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})
+
 	return common.AttachDefaultSafetySettings(rawJSON, "safetySettings")
 }
--- a/internal/translator/gemini/gemini/gemini_gemini_request.go
+++ b/internal/translator/gemini/gemini/gemini_gemini_request.go
@@ -30,6 +30,11 @@ func ConvertGeminiRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte
 	if toolsResult.Exists() && toolsResult.IsArray() {
 		toolResults := toolsResult.Array()
 		for i := 0; i < len(toolResults); i++ {
+			if gjson.GetBytes(rawJSON, fmt.Sprintf("tools.%d.functionDeclarations", i)).Exists() {
+				strJson, _ := util.RenameKey(string(rawJSON), fmt.Sprintf("tools.%d.functionDeclarations", i), fmt.Sprintf("tools.%d.function_declarations", i))
+				rawJSON = []byte(strJson)
+			}
+
 			functionDeclarationsResult := gjson.GetBytes(rawJSON, fmt.Sprintf("tools.%d.function_declarations", i))
 			if functionDeclarationsResult.Exists() && functionDeclarationsResult.IsArray() {
 				functionDeclarationsResults := functionDeclarationsResult.Array()
@@ -72,7 +77,20 @@ func ConvertGeminiRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte
 		return true
 	})

-	out = common.AttachDefaultSafetySettings(out, "safetySettings")
+	gjson.GetBytes(out, "contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})

+	out = common.AttachDefaultSafetySettings(out, "safetySettings")
 	return out
 }
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -116,8 +116,11 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 				thoughtSignatureResult = partResult.Get("thought_signature")
 			}

-			// Skip thoughtSignature parts (encrypted reasoning not exposed downstream).
-			if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" {
+			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+			// Skip pure thoughtSignature parts but keep any actual payload in the same part.
+			if hasThoughtSignature && !hasContentPayload {
 				continue
 			}

--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -33,7 +33,83 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte

 	// Convert input messages to Gemini contents format
 	if input := root.Get("input"); input.Exists() && input.IsArray() {
-		input.ForEach(func(_, item gjson.Result) bool {
+		items := input.Array()
+
+		// Normalize consecutive function calls and outputs so each call is immediately followed by its response
+		normalized := make([]gjson.Result, 0, len(items))
+		for i := 0; i < len(items); {
+			item := items[i]
+			itemType := item.Get("type").String()
+			itemRole := item.Get("role").String()
+			if itemType == "" && itemRole != "" {
+				itemType = "message"
+			}
+
+			if itemType == "function_call" {
+				var calls []gjson.Result
+				var outputs []gjson.Result
+
+				for i < len(items) {
+					next := items[i]
+					nextType := next.Get("type").String()
+					nextRole := next.Get("role").String()
+					if nextType == "" && nextRole != "" {
+						nextType = "message"
+					}
+					if nextType != "function_call" {
+						break
+					}
+					calls = append(calls, next)
+					i++
+				}
+
+				for i < len(items) {
+					next := items[i]
+					nextType := next.Get("type").String()
+					nextRole := next.Get("role").String()
+					if nextType == "" && nextRole != "" {
+						nextType = "message"
+					}
+					if nextType != "function_call_output" {
+						break
+					}
+					outputs = append(outputs, next)
+					i++
+				}
+
+				if len(calls) > 0 {
+					outputMap := make(map[string]gjson.Result, len(outputs))
+					for _, out := range outputs {
+						outputMap[out.Get("call_id").String()] = out
+					}
+					for _, call := range calls {
+						normalized = append(normalized, call)
+						callID := call.Get("call_id").String()
+						if resp, ok := outputMap[callID]; ok {
+							normalized = append(normalized, resp)
+							delete(outputMap, callID)
+						}
+					}
+					for _, out := range outputs {
+						if _, ok := outputMap[out.Get("call_id").String()]; ok {
+							normalized = append(normalized, out)
+						}
+					}
+					continue
+				}
+			}
+
+			if itemType == "function_call_output" {
+				normalized = append(normalized, item)
+				i++
+				continue
+			}
+
+			normalized = append(normalized, item)
+			i++
+		}
+
+		for _, item := range normalized {
 			itemType := item.Get("type").String()
 			itemRole := item.Get("role").String()
 			if itemType == "" && itemRole != "" {
@@ -59,7 +135,7 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 							out, _ = sjson.SetRaw(out, "system_instruction", systemInstr)
 						}
 					}
-					return true
+					continue
 				}

 				// Handle regular messages
@@ -186,7 +262,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			case "function_call_output":
 				// Handle function call outputs - convert to function message with functionResponse
 				callID := item.Get("call_id").String()
-				output := item.Get("output").String()
+				// Use .Raw to preserve the JSON encoding (includes quotes for strings)
+				outputRaw := item.Get("output").Str

 				functionContent := `{"role":"function","parts":[]}`
 				functionResponse := `{"functionResponse":{"name":"","response":{}}}`
@@ -209,18 +286,19 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte

 				functionResponse, _ = sjson.Set(functionResponse, "functionResponse.name", functionName)

-				// Parse output JSON string and set as response content
-				if output != "" {
-					outputResult := gjson.Parse(output)
-					functionResponse, _ = sjson.Set(functionResponse, "functionResponse.response.result", outputResult.Raw)
+				// Set the raw JSON output directly (preserves string encoding)
+				if outputRaw != "" && outputRaw != "null" {
+					output := gjson.Parse(outputRaw)
+					if output.Type == gjson.JSON {
+						functionResponse, _ = sjson.SetRaw(functionResponse, "functionResponse.response.result", output.Raw)
+					} else {
+						functionResponse, _ = sjson.Set(functionResponse, "functionResponse.response.result", outputRaw)
+					}
 				}
-
 				functionContent, _ = sjson.SetRaw(functionContent, "parts.-1", functionResponse)
 				out, _ = sjson.SetRaw(out, "contents.-1", functionContent)
 			}
-
-			return true
-		})
+		}
 	} else if input.Exists() && input.Type == gjson.String {
 		// Simple string input conversion to user message
 		userContent := `{"role":"user","parts":[{"text":""}]}`
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
@@ -433,12 +433,18 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 			// output tokens
 			if v := um.Get("candidatesTokenCount"); v.Exists() {
 				completed, _ = sjson.Set(completed, "response.usage.output_tokens", v.Int())
+			} else {
+				completed, _ = sjson.Set(completed, "response.usage.output_tokens", 0)
 			}
 			if v := um.Get("thoughtsTokenCount"); v.Exists() {
 				completed, _ = sjson.Set(completed, "response.usage.output_tokens_details.reasoning_tokens", v.Int())
+			} else {
+				completed, _ = sjson.Set(completed, "response.usage.output_tokens_details.reasoning_tokens", 0)
 			}
 			if v := um.Get("totalTokenCount"); v.Exists() {
 				completed, _ = sjson.Set(completed, "response.usage.total_tokens", v.Int())
+			} else {
+				completed, _ = sjson.Set(completed, "response.usage.total_tokens", 0)
 			}
 		}

--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -202,6 +202,8 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 			out, _ = sjson.Set(out, "reasoning_effort", "medium")
 		case "high":
 			out, _ = sjson.Set(out, "reasoning_effort", "high")
+		case "xhigh":
+			out, _ = sjson.Set(out, "reasoning_effort", "xhigh")
 		default:
 			out, _ = sjson.Set(out, "reasoning_effort", "auto")
 		}
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -30,6 +30,16 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+func matchProvider(provider string, targets []string) (string, bool) {
+	p := strings.ToLower(strings.TrimSpace(provider))
+	for _, t := range targets {
+		if strings.EqualFold(p, strings.TrimSpace(t)) {
+			return p, true
+		}
+	}
+	return p, false
+}
+
 // storePersister captures persistence-capable token store methods used by the watcher.
 type storePersister interface {
 	PersistConfig(ctx context.Context) error
@@ -54,6 +64,7 @@ type Watcher struct {
 	lastConfigHash    string
 	authQueue         chan<- AuthUpdate
 	currentAuths      map[string]*coreauth.Auth
+	runtimeAuths      map[string]*coreauth.Auth
 	dispatchMu        sync.Mutex
 	dispatchCond      *sync.Cond
 	pendingUpdates    map[string]AuthUpdate
@@ -169,7 +180,7 @@ func (w *Watcher) Start(ctx context.Context) error {
 	go w.processEvents(ctx)

 	// Perform an initial full reload based on current config and auth dir
-	w.reloadClients(true)
+	w.reloadClients(true, nil)
 	return nil
 }

@@ -221,9 +232,57 @@ func (w *Watcher) SetAuthUpdateQueue(queue chan<- AuthUpdate) {
 	}
 }

+// DispatchRuntimeAuthUpdate allows external runtime providers (e.g., websocket-driven auths)
+// to push auth updates through the same queue used by file/config watchers.
+// Returns true if the update was enqueued; false if no queue is configured.
+func (w *Watcher) DispatchRuntimeAuthUpdate(update AuthUpdate) bool {
+	if w == nil {
+		return false
+	}
+	w.clientsMutex.Lock()
+	if w.runtimeAuths == nil {
+		w.runtimeAuths = make(map[string]*coreauth.Auth)
+	}
+	switch update.Action {
+	case AuthUpdateActionAdd, AuthUpdateActionModify:
+		if update.Auth != nil && update.Auth.ID != "" {
+			clone := update.Auth.Clone()
+			w.runtimeAuths[clone.ID] = clone
+			if w.currentAuths == nil {
+				w.currentAuths = make(map[string]*coreauth.Auth)
+			}
+			w.currentAuths[clone.ID] = clone.Clone()
+		}
+	case AuthUpdateActionDelete:
+		id := update.ID
+		if id == "" && update.Auth != nil {
+			id = update.Auth.ID
+		}
+		if id != "" {
+			delete(w.runtimeAuths, id)
+			if w.currentAuths != nil {
+				delete(w.currentAuths, id)
+			}
+		}
+	}
+	w.clientsMutex.Unlock()
+	if w.getAuthQueue() == nil {
+		return false
+	}
+	w.dispatchAuthUpdates([]AuthUpdate{update})
+	return true
+}
+
 func (w *Watcher) refreshAuthState() {
 	auths := w.SnapshotCoreAuths()
 	w.clientsMutex.Lock()
+	if len(w.runtimeAuths) > 0 {
+		for _, a := range w.runtimeAuths {
+			if a != nil {
+				auths = append(auths, a.Clone())
+			}
+		}
+	}
 	updates := w.prepareAuthUpdatesLocked(auths)
 	w.clientsMutex.Unlock()
 	w.dispatchAuthUpdates(updates)
@@ -450,6 +509,142 @@ func computeClaudeModelsHash(models []config.ClaudeModel) string {
 	return hex.EncodeToString(sum[:])
 }

+func computeExcludedModelsHash(excluded []string) string {
+	if len(excluded) == 0 {
+		return ""
+	}
+	normalized := make([]string, 0, len(excluded))
+	for _, entry := range excluded {
+		if trimmed := strings.TrimSpace(entry); trimmed != "" {
+			normalized = append(normalized, strings.ToLower(trimmed))
+		}
+	}
+	if len(normalized) == 0 {
+		return ""
+	}
+	sort.Strings(normalized)
+	data, err := json.Marshal(normalized)
+	if err != nil || len(data) == 0 {
+		return ""
+	}
+	sum := sha256.Sum256(data)
+	return hex.EncodeToString(sum[:])
+}
+
+type excludedModelsSummary struct {
+	hash  string
+	count int
+}
+
+func summarizeExcludedModels(list []string) excludedModelsSummary {
+	if len(list) == 0 {
+		return excludedModelsSummary{}
+	}
+	seen := make(map[string]struct{}, len(list))
+	normalized := make([]string, 0, len(list))
+	for _, entry := range list {
+		if trimmed := strings.ToLower(strings.TrimSpace(entry)); trimmed != "" {
+			if _, exists := seen[trimmed]; exists {
+				continue
+			}
+			seen[trimmed] = struct{}{}
+			normalized = append(normalized, trimmed)
+		}
+	}
+	sort.Strings(normalized)
+	return excludedModelsSummary{
+		hash:  computeExcludedModelsHash(normalized),
+		count: len(normalized),
+	}
+}
+
+func summarizeOAuthExcludedModels(entries map[string][]string) map[string]excludedModelsSummary {
+	if len(entries) == 0 {
+		return nil
+	}
+	out := make(map[string]excludedModelsSummary, len(entries))
+	for k, v := range entries {
+		key := strings.ToLower(strings.TrimSpace(k))
+		if key == "" {
+			continue
+		}
+		out[key] = summarizeExcludedModels(v)
+	}
+	return out
+}
+
+func diffOAuthExcludedModelChanges(oldMap, newMap map[string][]string) ([]string, []string) {
+	oldSummary := summarizeOAuthExcludedModels(oldMap)
+	newSummary := summarizeOAuthExcludedModels(newMap)
+	keys := make(map[string]struct{}, len(oldSummary)+len(newSummary))
+	for k := range oldSummary {
+		keys[k] = struct{}{}
+	}
+	for k := range newSummary {
+		keys[k] = struct{}{}
+	}
+	changes := make([]string, 0, len(keys))
+	affected := make([]string, 0, len(keys))
+	for key := range keys {
+		oldInfo, okOld := oldSummary[key]
+		newInfo, okNew := newSummary[key]
+		switch {
+		case okOld && !okNew:
+			changes = append(changes, fmt.Sprintf("oauth-excluded-models[%s]: removed", key))
+			affected = append(affected, key)
+		case !okOld && okNew:
+			changes = append(changes, fmt.Sprintf("oauth-excluded-models[%s]: added (%d entries)", key, newInfo.count))
+			affected = append(affected, key)
+		case okOld && okNew && oldInfo.hash != newInfo.hash:
+			changes = append(changes, fmt.Sprintf("oauth-excluded-models[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
+			affected = append(affected, key)
+		}
+	}
+	sort.Strings(changes)
+	sort.Strings(affected)
+	return changes, affected
+}
+
+func applyAuthExcludedModelsMeta(auth *coreauth.Auth, cfg *config.Config, perKey []string, authKind string) {
+	if auth == nil || cfg == nil {
+		return
+	}
+	authKindKey := strings.ToLower(strings.TrimSpace(authKind))
+	seen := make(map[string]struct{})
+	add := func(list []string) {
+		for _, entry := range list {
+			if trimmed := strings.TrimSpace(entry); trimmed != "" {
+				key := strings.ToLower(trimmed)
+				if _, exists := seen[key]; exists {
+					continue
+				}
+				seen[key] = struct{}{}
+			}
+		}
+	}
+	if authKindKey == "apikey" {
+		add(perKey)
+	} else if cfg.OAuthExcludedModels != nil {
+		providerKey := strings.ToLower(strings.TrimSpace(auth.Provider))
+		add(cfg.OAuthExcludedModels[providerKey])
+	}
+	combined := make([]string, 0, len(seen))
+	for k := range seen {
+		combined = append(combined, k)
+	}
+	sort.Strings(combined)
+	hash := computeExcludedModelsHash(combined)
+	if auth.Attributes == nil {
+		auth.Attributes = make(map[string]string)
+	}
+	if hash != "" {
+		auth.Attributes["excluded_models_hash"] = hash
+	}
+	if authKind != "" {
+		auth.Attributes["auth_kind"] = authKind
+	}
+}
+
 // SetClients sets the file-based clients.
 // SetClients removed
 // SetAPIKeyClients removed
@@ -474,6 +669,33 @@ func (w *Watcher) processEvents(ctx context.Context) {
 	}
 }

+func (w *Watcher) authFileUnchanged(path string) (bool, error) {
+	data, errRead := os.ReadFile(path)
+	if errRead != nil {
+		return false, errRead
+	}
+	if len(data) == 0 {
+		return false, nil
+	}
+	sum := sha256.Sum256(data)
+	curHash := hex.EncodeToString(sum[:])
+
+	w.clientsMutex.RLock()
+	prevHash, ok := w.lastAuthHashes[path]
+	w.clientsMutex.RUnlock()
+	if ok && prevHash == curHash {
+		return true, nil
+	}
+	return false, nil
+}
+
+func (w *Watcher) isKnownAuthFile(path string) bool {
+	w.clientsMutex.RLock()
+	defer w.clientsMutex.RUnlock()
+	_, ok := w.lastAuthHashes[path]
+	return ok
+}
+
 // handleEvent processes individual file system events
 func (w *Watcher) handleEvent(event fsnotify.Event) {
 	// Filter only relevant events: config file or auth-dir JSON files.
@@ -497,19 +719,33 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {
 	}

 	// Handle auth directory changes incrementally (.json only)
-	fmt.Printf("auth file changed (%s): %s, processing incrementally\n", event.Op.String(), filepath.Base(event.Name))
 	if event.Op&(fsnotify.Remove|fsnotify.Rename) != 0 {
 		// Atomic replace on some platforms may surface as Rename (or Remove) before the new file is ready.
 		// Wait briefly; if the path exists again, treat as an update instead of removal.
 		time.Sleep(replaceCheckDelay)
 		if _, statErr := os.Stat(event.Name); statErr == nil {
+			if unchanged, errSame := w.authFileUnchanged(event.Name); errSame == nil && unchanged {
+				log.Debugf("auth file unchanged (hash match), skipping reload: %s", filepath.Base(event.Name))
+				return
+			}
+			fmt.Printf("auth file changed (%s): %s, processing incrementally\n", event.Op.String(), filepath.Base(event.Name))
 			w.addOrUpdateClient(event.Name)
 			return
 		}
+		if !w.isKnownAuthFile(event.Name) {
+			log.Debugf("ignoring remove for unknown auth file: %s", filepath.Base(event.Name))
+			return
+		}
+		fmt.Printf("auth file changed (%s): %s, processing incrementally\n", event.Op.String(), filepath.Base(event.Name))
 		w.removeClient(event.Name)
 		return
 	}
 	if event.Op&(fsnotify.Create|fsnotify.Write) != 0 {
+		if unchanged, errSame := w.authFileUnchanged(event.Name); errSame == nil && unchanged {
+			log.Debugf("auth file unchanged (hash match), skipping reload: %s", filepath.Base(event.Name))
+			return
+		}
+		fmt.Printf("auth file changed (%s): %s, processing incrementally\n", event.Op.String(), filepath.Base(event.Name))
 		w.addOrUpdateClient(event.Name)
 	}
 }
@@ -593,6 +829,11 @@ func (w *Watcher) reloadConfig() bool {
 	w.config = newConfig
 	w.clientsMutex.Unlock()

+	var affectedOAuthProviders []string
+	if oldConfig != nil {
+		_, affectedOAuthProviders = diffOAuthExcludedModelChanges(oldConfig.OAuthExcludedModels, newConfig.OAuthExcludedModels)
+	}
+
 	// Always apply the current log level based on the latest config.
 	// This ensures logrus reflects the desired level even if change detection misses.
 	util.SetLogLevel(newConfig)
@@ -618,12 +859,12 @@ func (w *Watcher) reloadConfig() bool {

 	log.Infof("config successfully reloaded, triggering client reload")
 	// Reload clients with new config
-	w.reloadClients(authDirChanged)
+	w.reloadClients(authDirChanged, affectedOAuthProviders)
 	return true
 }

 // reloadClients performs a full scan and reload of all clients.
-func (w *Watcher) reloadClients(rescanAuth bool) {
+func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string) {
 	log.Debugf("starting full client load process")

 	w.clientsMutex.RLock()
@@ -635,6 +876,28 @@ func (w *Watcher) reloadClients(rescanAuth bool) {
 		return
 	}

+	if len(affectedOAuthProviders) > 0 {
+		w.clientsMutex.Lock()
+		if w.currentAuths != nil {
+			filtered := make(map[string]*coreauth.Auth, len(w.currentAuths))
+			for id, auth := range w.currentAuths {
+				if auth == nil {
+					continue
+				}
+				provider := strings.ToLower(strings.TrimSpace(auth.Provider))
+				if _, match := matchProvider(provider, affectedOAuthProviders); match {
+					continue
+				}
+				filtered[id] = auth
+			}
+			w.currentAuths = filtered
+			log.Debugf("applying oauth-excluded-models to providers %v", affectedOAuthProviders)
+		} else {
+			w.currentAuths = nil
+		}
+		w.clientsMutex.Unlock()
+	}
+
 	// Unregister all old API key clients before creating new ones
 	// no legacy clients to unregister

@@ -808,6 +1071,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 				CreatedAt:  now,
 				UpdatedAt:  now,
 			}
+			applyAuthExcludedModelsMeta(a, cfg, entry.ExcludedModels, "apikey")
 			out = append(out, a)
 		}
 		// Claude API keys -> synthesize auths
@@ -841,6 +1105,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 				CreatedAt:  now,
 				UpdatedAt:  now,
 			}
+			applyAuthExcludedModelsMeta(a, cfg, ck.ExcludedModels, "apikey")
 			out = append(out, a)
 		}
 		// Codex API keys -> synthesize auths
@@ -870,6 +1135,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 				CreatedAt:  now,
 				UpdatedAt:  now,
 			}
+			applyAuthExcludedModelsMeta(a, cfg, ck.ExcludedModels, "apikey")
 			out = append(out, a)
 		}
 		for i := range cfg.OpenAICompatibility {
@@ -1030,8 +1296,12 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 			CreatedAt: now,
 			UpdatedAt: now,
 		}
+		applyAuthExcludedModelsMeta(a, cfg, nil, "oauth")
 		if provider == "gemini-cli" {
 			if virtuals := synthesizeGeminiVirtualAuths(a, metadata, now); len(virtuals) > 0 {
+				for _, v := range virtuals {
+					applyAuthExcludedModelsMeta(v, cfg, nil, "oauth")
+				}
 				out = append(out, a)
 				out = append(out, virtuals...)
 				continue
@@ -1378,6 +1648,9 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if oldCfg.RequestRetry != newCfg.RequestRetry {
 		changes = append(changes, fmt.Sprintf("request-retry: %d -> %d", oldCfg.RequestRetry, newCfg.RequestRetry))
 	}
+	if oldCfg.MaxRetryInterval != newCfg.MaxRetryInterval {
+		changes = append(changes, fmt.Sprintf("max-retry-interval: %d -> %d", oldCfg.MaxRetryInterval, newCfg.MaxRetryInterval))
+	}
 	if oldCfg.ProxyURL != newCfg.ProxyURL {
 		changes = append(changes, fmt.Sprintf("proxy-url: %s -> %s", oldCfg.ProxyURL, newCfg.ProxyURL))
 	}
@@ -1420,6 +1693,11 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 			if !equalStringMap(o.Headers, n.Headers) {
 				changes = append(changes, fmt.Sprintf("gemini[%d].headers: updated", i))
 			}
+			oldExcluded := summarizeExcludedModels(o.ExcludedModels)
+			newExcluded := summarizeExcludedModels(n.ExcludedModels)
+			if oldExcluded.hash != newExcluded.hash {
+				changes = append(changes, fmt.Sprintf("gemini[%d].excluded-models: updated (%d -> %d entries)", i, oldExcluded.count, newExcluded.count))
+			}
 		}
 		if !reflect.DeepEqual(trimStrings(oldCfg.GlAPIKey), trimStrings(newCfg.GlAPIKey)) {
 			changes = append(changes, "generative-language-api-key: values updated (legacy view, redacted)")
@@ -1448,6 +1726,11 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 			if !equalStringMap(o.Headers, n.Headers) {
 				changes = append(changes, fmt.Sprintf("claude[%d].headers: updated", i))
 			}
+			oldExcluded := summarizeExcludedModels(o.ExcludedModels)
+			newExcluded := summarizeExcludedModels(n.ExcludedModels)
+			if oldExcluded.hash != newExcluded.hash {
+				changes = append(changes, fmt.Sprintf("claude[%d].excluded-models: updated (%d -> %d entries)", i, oldExcluded.count, newExcluded.count))
+			}
 		}
 	}

@@ -1473,9 +1756,18 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 			if !equalStringMap(o.Headers, n.Headers) {
 				changes = append(changes, fmt.Sprintf("codex[%d].headers: updated", i))
 			}
+			oldExcluded := summarizeExcludedModels(o.ExcludedModels)
+			newExcluded := summarizeExcludedModels(n.ExcludedModels)
+			if oldExcluded.hash != newExcluded.hash {
+				changes = append(changes, fmt.Sprintf("codex[%d].excluded-models: updated (%d -> %d entries)", i, oldExcluded.count, newExcluded.count))
+			}
 		}
 	}

+	if entries, _ := diffOAuthExcludedModelChanges(oldCfg.OAuthExcludedModels, newCfg.OAuthExcludedModels); len(entries) > 0 {
+		changes = append(changes, entries...)
+	}
+
 	// Remote management (never print the key)
 	if oldCfg.RemoteManagement.AllowRemote != newCfg.RemoteManagement.AllowRemote {
 		changes = append(changes, fmt.Sprintf("remote-management.allow-remote: %t -> %t", oldCfg.RemoteManagement.AllowRemote, newCfg.RemoteManagement.AllowRemote))
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -69,6 +69,27 @@ func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) {
 		return
 	}
 	switch request.Action {
+	case "gemini-3-pro-preview":
+		c.JSON(http.StatusOK, gin.H{
+			"name":             "models/gemini-3-pro-preview",
+			"version":          "3",
+			"displayName":      "Gemini 3 Pro Preview",
+			"description":      "Gemini 3 Pro Preview",
+			"inputTokenLimit":  1048576,
+			"outputTokenLimit": 65536,
+			"supportedGenerationMethods": []string{
+				"generateContent",
+				"countTokens",
+				"createCachedContent",
+				"batchGenerateContent",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		},
+		)
 	case "gemini-2.5-pro":
 		c.JSON(http.StatusOK, gin.H{
 			"name":             "models/gemini-2.5-pro",
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -4,6 +4,7 @@
 package handlers

 import (
+	"bytes"
 	"fmt"
 	"net/http"
 	"strings"
@@ -120,11 +121,11 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 				data := params[0]
 				switch data.(type) {
 				case []byte:
-					c.Set("API_RESPONSE", data.([]byte))
+					appendAPIResponse(c, data.([]byte))
 				case error:
-					c.Set("API_RESPONSE", []byte(data.(error).Error()))
+					appendAPIResponse(c, []byte(data.(error).Error()))
 				case string:
-					c.Set("API_RESPONSE", []byte(data.(string)))
+					appendAPIResponse(c, []byte(data.(string)))
 				case bool:
 				case nil:
 				}
@@ -135,6 +136,28 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 	}
 }

+// appendAPIResponse preserves any previously captured API response and appends new data.
+func appendAPIResponse(c *gin.Context, data []byte) {
+	if c == nil || len(data) == 0 {
+		return
+	}
+
+	if existing, exists := c.Get("API_RESPONSE"); exists {
+		if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
+			combined := make([]byte, 0, len(existingBytes)+len(data)+1)
+			combined = append(combined, existingBytes...)
+			if existingBytes[len(existingBytes)-1] != '\n' {
+				combined = append(combined, '\n')
+			}
+			combined = append(combined, data...)
+			c.Set("API_RESPONSE", combined)
+			return
+		}
+	}
+
+	c.Set("API_RESPONSE", bytes.Clone(data))
+}
+
 // ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
@@ -297,7 +320,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
 	// Resolve "auto" model to an actual available model first
 	resolvedModelName := util.ResolveAutoModel(modelName)
-	
+
 	providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)

 	// First, normalize the model name to handle suffixes like "-thinking-128"
--- a/sdk/cliproxy/auth/manager.go
+++ b/sdk/cliproxy/auth/manager.go
@@ -106,6 +106,10 @@ type Manager struct {
 	// providerOffsets tracks per-model provider rotation state for multi-provider routing.
 	providerOffsets map[string]int

+	// Retry controls request retry behavior.
+	requestRetry     atomic.Int32
+	maxRetryInterval atomic.Int64
+
 	// Optional HTTP RoundTripper provider injected by host.
 	rtProvider RoundTripperProvider

@@ -145,6 +149,21 @@ func (m *Manager) SetRoundTripperProvider(p RoundTripperProvider) {
 	m.mu.Unlock()
 }

+// SetRetryConfig updates retry attempts and cooldown wait interval.
+func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) {
+	if m == nil {
+		return
+	}
+	if retry < 0 {
+		retry = 0
+	}
+	if maxRetryInterval < 0 {
+		maxRetryInterval = 0
+	}
+	m.requestRetry.Store(int32(retry))
+	m.maxRetryInterval.Store(maxRetryInterval.Nanoseconds())
+}
+
 // RegisterExecutor registers a provider executor with the manager.
 func (m *Manager) RegisterExecutor(executor ProviderExecutor) {
 	if executor == nil {
@@ -188,8 +207,12 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
 	if auth == nil || auth.ID == "" {
 		return nil, nil
 	}
-	auth.EnsureIndex()
 	m.mu.Lock()
+	if existing, ok := m.auths[auth.ID]; ok && existing != nil && !auth.indexAssigned && auth.Index == 0 {
+		auth.Index = existing.Index
+		auth.indexAssigned = existing.indexAssigned
+	}
+	auth.EnsureIndex()
 	m.auths[auth.ID] = auth.Clone()
 	m.mu.Unlock()
 	_ = m.persist(ctx, auth)
@@ -229,13 +252,28 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
 	rotated := m.rotateProviders(req.Model, normalized)
 	defer m.advanceProviderCursor(req.Model, normalized)

+	retryTimes, maxWait := m.retrySettings()
+	attempts := retryTimes + 1
+	if attempts < 1 {
+		attempts = 1
+	}
+
 	var lastErr error
-	for _, provider := range rotated {
-		resp, errExec := m.executeWithProvider(ctx, provider, req, opts)
+	for attempt := 0; attempt < attempts; attempt++ {
+		resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) {
+			return m.executeWithProvider(execCtx, provider, req, opts)
+		})
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait)
+		if !shouldRetry {
+			break
+		}
+		if errWait := waitForCooldown(ctx, wait); errWait != nil {
+			return cliproxyexecutor.Response{}, errWait
+		}
 	}
 	if lastErr != nil {
 		return cliproxyexecutor.Response{}, lastErr
@@ -253,13 +291,28 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 	rotated := m.rotateProviders(req.Model, normalized)
 	defer m.advanceProviderCursor(req.Model, normalized)

+	retryTimes, maxWait := m.retrySettings()
+	attempts := retryTimes + 1
+	if attempts < 1 {
+		attempts = 1
+	}
+
 	var lastErr error
-	for _, provider := range rotated {
-		resp, errExec := m.executeCountWithProvider(ctx, provider, req, opts)
+	for attempt := 0; attempt < attempts; attempt++ {
+		resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) {
+			return m.executeCountWithProvider(execCtx, provider, req, opts)
+		})
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait)
+		if !shouldRetry {
+			break
+		}
+		if errWait := waitForCooldown(ctx, wait); errWait != nil {
+			return cliproxyexecutor.Response{}, errWait
+		}
 	}
 	if lastErr != nil {
 		return cliproxyexecutor.Response{}, lastErr
@@ -277,13 +330,28 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 	rotated := m.rotateProviders(req.Model, normalized)
 	defer m.advanceProviderCursor(req.Model, normalized)

+	retryTimes, maxWait := m.retrySettings()
+	attempts := retryTimes + 1
+	if attempts < 1 {
+		attempts = 1
+	}
+
 	var lastErr error
-	for _, provider := range rotated {
-		chunks, errStream := m.executeStreamWithProvider(ctx, provider, req, opts)
+	for attempt := 0; attempt < attempts; attempt++ {
+		chunks, errStream := m.executeStreamProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (<-chan cliproxyexecutor.StreamChunk, error) {
+			return m.executeStreamWithProvider(execCtx, provider, req, opts)
+		})
 		if errStream == nil {
 			return chunks, nil
 		}
 		lastErr = errStream
+		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, rotated, req.Model, maxWait)
+		if !shouldRetry {
+			break
+		}
+		if errWait := waitForCooldown(ctx, wait); errWait != nil {
+			return nil, errWait
+		}
 	}
 	if lastErr != nil {
 		return nil, lastErr
@@ -507,6 +575,123 @@ func (m *Manager) advanceProviderCursor(model string, providers []string) {
 	m.mu.Unlock()
 }

+func (m *Manager) retrySettings() (int, time.Duration) {
+	if m == nil {
+		return 0, 0
+	}
+	return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load())
+}
+
+func (m *Manager) closestCooldownWait(providers []string, model string) (time.Duration, bool) {
+	if m == nil || len(providers) == 0 {
+		return 0, false
+	}
+	now := time.Now()
+	providerSet := make(map[string]struct{}, len(providers))
+	for i := range providers {
+		key := strings.TrimSpace(strings.ToLower(providers[i]))
+		if key == "" {
+			continue
+		}
+		providerSet[key] = struct{}{}
+	}
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	var (
+		found   bool
+		minWait time.Duration
+	)
+	for _, auth := range m.auths {
+		if auth == nil {
+			continue
+		}
+		providerKey := strings.TrimSpace(strings.ToLower(auth.Provider))
+		if _, ok := providerSet[providerKey]; !ok {
+			continue
+		}
+		blocked, reason, next := isAuthBlockedForModel(auth, model, now)
+		if !blocked || next.IsZero() || reason == blockReasonDisabled {
+			continue
+		}
+		wait := next.Sub(now)
+		if wait < 0 {
+			continue
+		}
+		if !found || wait < minWait {
+			minWait = wait
+			found = true
+		}
+	}
+	return minWait, found
+}
+
+func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
+	if err == nil || attempt >= maxAttempts-1 {
+		return 0, false
+	}
+	if maxWait <= 0 {
+		return 0, false
+	}
+	if status := statusCodeFromError(err); status == http.StatusOK {
+		return 0, false
+	}
+	wait, found := m.closestCooldownWait(providers, model)
+	if !found || wait > maxWait {
+		return 0, false
+	}
+	return wait, true
+}
+
+func waitForCooldown(ctx context.Context, wait time.Duration) error {
+	if wait <= 0 {
+		return nil
+	}
+	timer := time.NewTimer(wait)
+	defer timer.Stop()
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-timer.C:
+		return nil
+	}
+}
+
+func (m *Manager) executeProvidersOnce(ctx context.Context, providers []string, fn func(context.Context, string) (cliproxyexecutor.Response, error)) (cliproxyexecutor.Response, error) {
+	if len(providers) == 0 {
+		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+	var lastErr error
+	for _, provider := range providers {
+		resp, errExec := fn(ctx, provider)
+		if errExec == nil {
+			return resp, nil
+		}
+		lastErr = errExec
+	}
+	if lastErr != nil {
+		return cliproxyexecutor.Response{}, lastErr
+	}
+	return cliproxyexecutor.Response{}, &Error{Code: "auth_not_found", Message: "no auth available"}
+}
+
+func (m *Manager) executeStreamProvidersOnce(ctx context.Context, providers []string, fn func(context.Context, string) (<-chan cliproxyexecutor.StreamChunk, error)) (<-chan cliproxyexecutor.StreamChunk, error) {
+	if len(providers) == 0 {
+		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+	var lastErr error
+	for _, provider := range providers {
+		chunks, errExec := fn(ctx, provider)
+		if errExec == nil {
+			return chunks, nil
+		}
+		lastErr = errExec
+	}
+	if lastErr != nil {
+		return nil, lastErr
+	}
+	return nil, &Error{Code: "auth_not_found", Message: "no auth available"}
+}
+
 // MarkResult records an execution result and notifies hooks.
 func (m *Manager) MarkResult(ctx context.Context, result Result) {
 	if result.AuthID == "" {
@@ -762,6 +947,20 @@ func cloneError(err *Error) *Error {
 	}
 }

+func statusCodeFromError(err error) int {
+	if err == nil {
+		return 0
+	}
+	type statusCoder interface {
+		StatusCode() int
+	}
+	var sc statusCoder
+	if errors.As(err, &sc) && sc != nil {
+		return sc.StatusCode()
+	}
+	return 0
+}
+
 func retryAfterFromError(err error) *time.Duration {
 	if err == nil {
 		return nil
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -146,6 +146,27 @@ func (s *Service) consumeAuthUpdates(ctx context.Context) {
 	}
 }

+func (s *Service) emitAuthUpdate(ctx context.Context, update watcher.AuthUpdate) {
+	if s == nil {
+		return
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	if s.watcher != nil && s.watcher.DispatchRuntimeAuthUpdate(update) {
+		return
+	}
+	if s.authUpdates != nil {
+		select {
+		case s.authUpdates <- update:
+			return
+		default:
+			log.Debugf("auth update queue saturated, applying inline action=%v id=%s", update.Action, update.ID)
+		}
+	}
+	s.handleAuthUpdate(ctx, update)
+}
+
 func (s *Service) handleAuthUpdate(ctx context.Context, update watcher.AuthUpdate) {
 	if s == nil {
 		return
@@ -220,7 +241,11 @@ func (s *Service) wsOnConnected(channelID string) {
 		Metadata:   map[string]any{"email": channelID}, // metadata drives logging and usage tracking
 	}
 	log.Infof("websocket provider connected: %s", channelID)
-	s.applyCoreAuthAddOrUpdate(context.Background(), auth)
+	s.emitAuthUpdate(context.Background(), watcher.AuthUpdate{
+		Action: watcher.AuthUpdateActionAdd,
+		ID:     auth.ID,
+		Auth:   auth,
+	})
 }

 func (s *Service) wsOnDisconnected(channelID string, reason error) {
@@ -237,7 +262,10 @@ func (s *Service) wsOnDisconnected(channelID string, reason error) {
 		log.Infof("websocket provider disconnected: %s", channelID)
 	}
 	ctx := context.Background()
-	s.applyCoreAuthRemoval(ctx, channelID)
+	s.emitAuthUpdate(ctx, watcher.AuthUpdate{
+		Action: watcher.AuthUpdateActionDelete,
+		ID:     channelID,
+	})
 }

 func (s *Service) applyCoreAuthAddOrUpdate(ctx context.Context, auth *coreauth.Auth) {
@@ -281,6 +309,14 @@ func (s *Service) applyCoreAuthRemoval(ctx context.Context, id string) {
 	}
 }

+func (s *Service) applyRetryConfig(cfg *config.Config) {
+	if s == nil || s.coreManager == nil || cfg == nil {
+		return
+	}
+	maxInterval := time.Duration(cfg.MaxRetryInterval) * time.Second
+	s.coreManager.SetRetryConfig(cfg.RequestRetry, maxInterval)
+}
+
 func openAICompatInfoFromAuth(a *coreauth.Auth) (providerKey string, compatName string, ok bool) {
 	if a == nil {
 		return "", "", false
@@ -394,6 +430,8 @@ func (s *Service) Run(ctx context.Context) error {
 		return err
 	}

+	s.applyRetryConfig(s.cfg)
+
 	if s.coreManager != nil {
 		if errLoad := s.coreManager.Load(ctx); errLoad != nil {
 			log.Warnf("failed to load auth store: %v", errLoad)
@@ -476,6 +514,7 @@ func (s *Service) Run(ctx context.Context) error {
 		if newCfg == nil {
 			return
 		}
+		s.applyRetryConfig(newCfg)
 		if s.server != nil {
 			s.server.UpdateClients(newCfg)
 		}
@@ -606,6 +645,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 	if a == nil || a.ID == "" {
 		return
 	}
+	authKind := strings.ToLower(strings.TrimSpace(a.Attributes["auth_kind"]))
 	if a.Attributes != nil {
 		if v := strings.TrimSpace(a.Attributes["gemini_virtual_primary"]); strings.EqualFold(v, "true") {
 			GlobalModelRegistry().UnregisterClient(a.ID)
@@ -625,32 +665,57 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 	if compatDetected {
 		provider = "openai-compatibility"
 	}
+	excluded := s.oauthExcludedModels(provider, authKind)
 	var models []*ModelInfo
 	switch provider {
 	case "gemini":
 		models = registry.GetGeminiModels()
+		if entry := s.resolveConfigGeminiKey(a); entry != nil {
+			if authKind == "apikey" {
+				excluded = entry.ExcludedModels
+			}
+		}
+		models = applyExcludedModels(models, excluded)
 	case "vertex":
 		// Vertex AI Gemini supports the same model identifiers as Gemini.
 		models = registry.GetGeminiVertexModels()
+		models = applyExcludedModels(models, excluded)
 	case "gemini-cli":
 		models = registry.GetGeminiCLIModels()
+		models = applyExcludedModels(models, excluded)
 	case "aistudio":
 		models = registry.GetAIStudioModels()
+		models = applyExcludedModels(models, excluded)
 	case "antigravity":
 		ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
 		models = executor.FetchAntigravityModels(ctx, a, s.cfg)
 		cancel()
+		models = applyExcludedModels(models, excluded)
 	case "claude":
 		models = registry.GetClaudeModels()
-		if entry := s.resolveConfigClaudeKey(a); entry != nil && len(entry.Models) > 0 {
-			models = buildClaudeConfigModels(entry)
+		if entry := s.resolveConfigClaudeKey(a); entry != nil {
+			if len(entry.Models) > 0 {
+				models = buildClaudeConfigModels(entry)
+			}
+			if authKind == "apikey" {
+				excluded = entry.ExcludedModels
+			}
 		}
+		models = applyExcludedModels(models, excluded)
 	case "codex":
 		models = registry.GetOpenAIModels()
+		if entry := s.resolveConfigCodexKey(a); entry != nil {
+			if authKind == "apikey" {
+				excluded = entry.ExcludedModels
+			}
+		}
+		models = applyExcludedModels(models, excluded)
 	case "qwen":
 		models = registry.GetQwenModels()
+		models = applyExcludedModels(models, excluded)
 	case "iflow":
 		models = registry.GetIFlowModels()
+		models = applyExcludedModels(models, excluded)
 	default:
 		// Handle OpenAI-compatibility providers by name using config
 		if s.cfg != nil {
@@ -738,7 +803,10 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 			key = strings.ToLower(strings.TrimSpace(a.Provider))
 		}
 		GlobalModelRegistry().RegisterClient(a.ID, key, models)
+		return
 	}
+
+	GlobalModelRegistry().UnregisterClient(a.ID)
 }

 func (s *Service) resolveConfigClaudeKey(auth *coreauth.Auth) *config.ClaudeKey {
@@ -780,6 +848,150 @@ func (s *Service) resolveConfigClaudeKey(auth *coreauth.Auth) *config.ClaudeKey
 	return nil
 }

+func (s *Service) resolveConfigGeminiKey(auth *coreauth.Auth) *config.GeminiKey {
+	if auth == nil || s.cfg == nil {
+		return nil
+	}
+	var attrKey, attrBase string
+	if auth.Attributes != nil {
+		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
+		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
+	}
+	for i := range s.cfg.GeminiKey {
+		entry := &s.cfg.GeminiKey[i]
+		cfgKey := strings.TrimSpace(entry.APIKey)
+		cfgBase := strings.TrimSpace(entry.BaseURL)
+		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
+			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
+				return entry
+			}
+			continue
+		}
+		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
+			return entry
+		}
+	}
+	return nil
+}
+
+func (s *Service) resolveConfigCodexKey(auth *coreauth.Auth) *config.CodexKey {
+	if auth == nil || s.cfg == nil {
+		return nil
+	}
+	var attrKey, attrBase string
+	if auth.Attributes != nil {
+		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
+		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
+	}
+	for i := range s.cfg.CodexKey {
+		entry := &s.cfg.CodexKey[i]
+		cfgKey := strings.TrimSpace(entry.APIKey)
+		cfgBase := strings.TrimSpace(entry.BaseURL)
+		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
+			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
+				return entry
+			}
+			continue
+		}
+		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
+			return entry
+		}
+	}
+	return nil
+}
+
+func (s *Service) oauthExcludedModels(provider, authKind string) []string {
+	cfg := s.cfg
+	if cfg == nil {
+		return nil
+	}
+	authKindKey := strings.ToLower(strings.TrimSpace(authKind))
+	providerKey := strings.ToLower(strings.TrimSpace(provider))
+	if authKindKey == "apikey" {
+		return nil
+	}
+	return cfg.OAuthExcludedModels[providerKey]
+}
+
+func applyExcludedModels(models []*ModelInfo, excluded []string) []*ModelInfo {
+	if len(models) == 0 || len(excluded) == 0 {
+		return models
+	}
+
+	patterns := make([]string, 0, len(excluded))
+	for _, item := range excluded {
+		if trimmed := strings.TrimSpace(item); trimmed != "" {
+			patterns = append(patterns, strings.ToLower(trimmed))
+		}
+	}
+	if len(patterns) == 0 {
+		return models
+	}
+
+	filtered := make([]*ModelInfo, 0, len(models))
+	for _, model := range models {
+		if model == nil {
+			continue
+		}
+		modelID := strings.ToLower(strings.TrimSpace(model.ID))
+		blocked := false
+		for _, pattern := range patterns {
+			if matchWildcard(pattern, modelID) {
+				blocked = true
+				break
+			}
+		}
+		if !blocked {
+			filtered = append(filtered, model)
+		}
+	}
+	return filtered
+}
+
+// matchWildcard performs case-insensitive wildcard matching where '*' matches any substring.
+func matchWildcard(pattern, value string) bool {
+	if pattern == "" {
+		return false
+	}
+
+	// Fast path for exact match (no wildcard present).
+	if !strings.Contains(pattern, "*") {
+		return pattern == value
+	}
+
+	parts := strings.Split(pattern, "*")
+	// Handle prefix.
+	if prefix := parts[0]; prefix != "" {
+		if !strings.HasPrefix(value, prefix) {
+			return false
+		}
+		value = value[len(prefix):]
+	}
+
+	// Handle suffix.
+	if suffix := parts[len(parts)-1]; suffix != "" {
+		if !strings.HasSuffix(value, suffix) {
+			return false
+		}
+		value = value[:len(value)-len(suffix)]
+	}
+
+	// Handle middle segments in order.
+	for i := 1; i < len(parts)-1; i++ {
+		segment := parts[i]
+		if segment == "" {
+			continue
+		}
+		idx := strings.Index(value, segment)
+		if idx < 0 {
+			return false
+		}
+		value = value[idx+len(segment):]
+	}
+
+	return true
+}
+
 func buildClaudeConfigModels(entry *config.ClaudeKey) []*ModelInfo {
 	if entry == nil || len(entry.Models) == 0 {
 		return nil
--- a/sdk/cliproxy/types.go
+++ b/sdk/cliproxy/types.go
@@ -83,9 +83,10 @@ type WatcherWrapper struct {
 	start func(ctx context.Context) error
 	stop  func() error

-	setConfig      func(cfg *config.Config)
-	snapshotAuths  func() []*coreauth.Auth
-	setUpdateQueue func(queue chan<- watcher.AuthUpdate)
+	setConfig             func(cfg *config.Config)
+	snapshotAuths         func() []*coreauth.Auth
+	setUpdateQueue        func(queue chan<- watcher.AuthUpdate)
+	dispatchRuntimeUpdate func(update watcher.AuthUpdate) bool
 }

 // Start proxies to the underlying watcher Start implementation.
@@ -112,6 +113,16 @@ func (w *WatcherWrapper) SetConfig(cfg *config.Config) {
 	w.setConfig(cfg)
 }

+// DispatchRuntimeAuthUpdate forwards runtime auth updates (e.g., websocket providers)
+// into the watcher-managed auth update queue when available.
+// Returns true if the update was enqueued successfully.
+func (w *WatcherWrapper) DispatchRuntimeAuthUpdate(update watcher.AuthUpdate) bool {
+	if w == nil || w.dispatchRuntimeUpdate == nil {
+		return false
+	}
+	return w.dispatchRuntimeUpdate(update)
+}
+
 // SetClients updates the watcher file-backed clients registry.
 // SetClients and SetAPIKeyClients removed; watcher manages its own caches

--- a/sdk/cliproxy/watcher.go
+++ b/sdk/cliproxy/watcher.go
@@ -28,5 +28,8 @@ func defaultWatcherFactory(configPath, authDir string, reload func(*config.Confi
 		setUpdateQueue: func(queue chan<- watcher.AuthUpdate) {
 			w.SetAuthUpdateQueue(queue)
 		},
+		dispatchRuntimeUpdate: func(update watcher.AuthUpdate) bool {
+			return w.DispatchRuntimeAuthUpdate(update)
+		},
 	}, nil
 }
Author	SHA1	Message	Date
Luis Pater	54a9c4c3c7	Merge pull request #371 from ben-vargas/test-amp-tools fix(amp): add /threads.rss root-level route for AMP CLI	2025-11-30 15:18:23 +08:00
Luis Pater	18b5c35dea	Merge pull request #366 from router-for-me/blacklist Add Model Blacklist	2025-11-30 15:17:46 +08:00
hkfires	7b7871ede2	feat(api): add oauth excluded model management	2025-11-30 13:38:23 +08:00
hkfires	c4e3646b75	docs(config): expand model exclusion examples	2025-11-30 11:55:47 +08:00
hkfires	022aa81be1	feat(cliproxy): support wildcard exclusions for models	2025-11-30 08:02:00 +08:00
hkfires	c43f0ea7b1	refactor(config): rename model blacklist fields to excluded models	2025-11-29 21:23:47 +08:00
hkfires	6a191358af	fix(auth): fix runtime auth reload on oauth blacklist change	2025-11-29 20:30:11 +08:00
Ben Vargas	db1119dd78	fix(amp): add /threads.rss root-level route for AMP CLI AMP CLI requests /threads.rss at the root level, but the AMP module only registered routes under /api/*. This caused a 404 error during AMP CLI startup. Add the missing root-level route with the same security middleware (noCORS, optional localhost restriction) as other management routes.	2025-11-29 05:01:19 -07:00
hkfires	5983e3ec87	feat(auth): add oauth provider model blacklist	2025-11-28 10:37:10 +08:00
hkfires	f8cebb9343	feat(config): add per-key model blacklist for providers	2025-11-27 21:57:07 +08:00
Luis Pater	72c7ef7647	fix(translator): handle non-JSON output parsing for OpenAI function responses - Updated `antigravity_openai_request.go` to process non-JSON outputs gracefully by verifying and distinguishing between JSON and plain string formats. - Ensured proper assignment of parsed or raw response to `functionResponse`.	2025-11-27 16:18:49 +08:00
Luis Pater	d2e4639b2a	feat(registry): add context length and update max tokens for Claude model configurations - Added `ContextLength` field with a value of 200,000 to all applicable Claude model definitions. - Standardized `MaxCompletionTokens` values across models for consistency and alignment.	2025-11-27 16:13:25 +08:00
Luis Pater	08321223c4	Merge pull request #340 from nestharus/fix/339-thinking-openai-gemini-compat fix(thinking): resolve OpenAI/Gemini compatibility for thinking model…	2025-11-27 16:03:24 +08:00
Luis Pater	7e30157590	Fixed: #354 fix(translator): add support for "xhigh" reasoning effort in OpenAI responses - Updated handling in `openai_openai-responses_request.go` to include the new "xhigh" reasoning effort level.	2025-11-27 15:59:15 +08:00
nestharus	e73cdf5cff	fix(claude): ensure max_tokens exceeds thinking budget for thinking models Fixes an issue where Claude thinking models would return 400 errors when the thinking.budget_tokens was greater than or equal to max_tokens. Changes: - Add MaxCompletionTokens: 128000 to all Claude thinking model definitions - Add ensureMaxTokensForThinking() function in claude_executor.go that: - Checks if thinking is enabled with a budget_tokens value - Looks up the model's MaxCompletionTokens from the registry - Ensures max_tokens is set to at least the model's MaxCompletionTokens - Falls back to budget_tokens + 4000 buffer if registry lookup fails This ensures Anthropic API constraint (max_tokens > thinking.budget_tokens) is always satisfied when using extended thinking features. Fixes: #339 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-26 22:31:05 -08:00
Luis Pater	39621a0340	fix(translator): normalize function calls and outputs for consistent input processing - Implemented logic to pair consecutive function calls and their outputs, ensuring proper sequencing for processing. - Adjusted `gemini_openai-responses_request.go` to normalize message structures and maintain expected flow.	2025-11-27 10:25:45 +08:00
Luis Pater	346b663079	fix(translator): handle non-JSON output gracefully in function call outputs - Updated handling of `output` in `gemini_openai-responses_request.go` to use `.Str` instead of `.Raw` when parsing non-JSON string outputs. - Added checks to distinguish between JSON and non-JSON `output` types for accurate `functionResponse` construction.	2025-11-27 09:40:00 +08:00
Luis Pater	0bcae68c6c	fix(translator): preserve raw JSON encoding in function call outputs - Updated handling of `output` in `gemini_openai-responses_request.go` to use `.Raw` instead of `.String` for preserving original JSON encoding. - Ensured proper setting of raw JSON output when constructing `functionResponse`.	2025-11-27 08:26:53 +08:00
Luis Pater	c8cee547fd	fix(translator): ensure partial content is retained while skipping encrypted thoughtSignature - Updated handling of `thoughtSignature` across all translator modules to retain other content payloads if present. - Adjusted logic for `thought_signature` and `inline_data` keys for consistent processing.	2025-11-27 00:52:17 +08:00
Luis Pater	36755421fe	Merge pull request #343 from router-for-me/misc style(amp): tidy whitespace in proxy module and tests	2025-11-26 19:03:07 +08:00
hkfires	6c17dbc4da	style(amp): tidy whitespace in proxy module and tests	2025-11-26 18:57:26 +08:00
Luis Pater	ee6429cc75	feat(registry): add Gemini 3 Pro Image Preview model and remove Claude Sonnet 4.5 Thinking - Added new `Gemini 3 Pro Image Preview` model with detailed metadata and configuration. - Removed outdated `Claude Sonnet 4.5 Thinking` model definition for cleanup and relevance.	2025-11-26 18:22:40 +08:00
Luis Pater	a4a26d978e	Fixed: #339 feat(handlers, executor): add Gemini 3 Pro Preview support and refine Claude system instructions - Added support for the new "Gemini 3 Pro Preview" action in Gemini handlers, including detailed metadata and configuration. - Removed redundant `cache_control` field from Claude system instructions for cleaner payload structure.	2025-11-26 11:42:57 +08:00
Luis Pater	ed9f6e897e	Fixed: #337 fix(executor): replace redundant commented code with `checkSystemInstructions` helper - Replaced commented-out `sjson.SetRawBytes` lines with the new `checkSystemInstructions` function. - Centralized system instruction handling for better code clarity and reuse. - Ensured consistent logic for managing `system` field across Claude executor flows.	2025-11-26 08:27:48 +08:00
Luis Pater	9c1e3c0687	Merge pull request #334 from nestharus/feat/claude-thinking-and-beta-headers feat(claude): add thinking model variants and beta headers support	2025-11-26 02:17:02 +08:00
Luis Pater	2e5681ea32	Merge branch 'dev' into feat/claude-thinking-and-beta-headers	2025-11-26 02:16:40 +08:00
Luis Pater	52c17f03a5	fix(executor): comment out redundant code for setting Claude system instructions - Commented out multiple instances of `sjson.SetRawBytes` for setting `system` key to Claude instructions as they are redundant. - Code cleanup to improve clarity and maintainability without affecting functionality.	2025-11-26 02:06:16 +08:00
nestharus	d0e694d4ed	feat(claude): add thinking model variants and beta headers support - Add Claude thinking model definitions (sonnet-4-5-thinking, opus-4-5-thinking variants) - Add Thinking support for antigravity models with -thinking suffix - Add injectThinkingConfig() for automatic thinking budget based on model suffix - Add resolveUpstreamModel() mappings for thinking variants to actual Claude models - Add extractAndRemoveBetas() to convert betas array to anthropic-beta header - Update applyClaudeHeaders() to merge custom betas from request body Closes #324	2025-11-25 03:33:05 -08:00
Luis Pater	506f1117dd	fix(handlers): refactor API response capture to append data safely - Introduced `appendAPIResponse` helper to preserve and append data to existing API responses. - Ensured newline inclusion when appending, if necessary. - Improved `nil` and data type checks for response handling. - Updated middleware to skip request logging for `GET` requests.	2025-11-25 11:37:02 +08:00
Luis Pater	113db3c5bf	fix(executor): update antigravity executor to enhance model metadata handling - Added additional metadata fields (`Name`, `Description`, `DisplayName`, `Version`) to `ModelInfo` struct initialization for better model representation. - Removed unnecessary whitespace in the code.	2025-11-25 09:19:01 +08:00
Luis Pater	1aa0b6cd11	Merge pull request #322 from ben-vargas/feat-claude-opus-4-5 feat(registry): add Claude Opus 4.5 model definition	2025-11-25 08:38:06 +08:00
Ben Vargas	0895533400	fix(registry): correct Claude Opus 4.5 created timestamp Update epoch from 1730419200 (2024-11-01) to 1761955200 (2025-11-01).	2025-11-24 12:27:23 -07:00
Ben Vargas	43f007c234	feat(registry): add Claude Opus 4.5 model definition Add support for claude-opus-4-5-20251101 with 200K context window and 64K max output tokens.	2025-11-24 12:26:39 -07:00
Luis Pater	0ceee56d99	Merge pull request #318 from router-for-me/log feat(logs): add limit query param to cap returned logs	2025-11-24 20:35:28 +08:00
hkfires	943a8c74df	feat(logs): add limit query param to cap returned logs	2025-11-24 19:59:24 +08:00
Luis Pater	0a47b452e9	fix(translator): add conditional check for key renaming in Gemini tools - Ensured `functionDeclarations` key renaming only occurs if the key exists in Gemini tools processing. - Prevented unnecessary JSON reassignment when the target key is absent.	2025-11-24 17:15:43 +08:00
Luis Pater	261f08a82a	fix(translator): adjust key renaming logic in Gemini request processing - Fixed parameter key renaming to correctly handle `functionDeclarations` and `parametersJsonSchema` in Gemini tools. - Resolved potential overwriting issue by reassigning JSON strings after each key rename.	2025-11-24 17:12:04 +08:00
Luis Pater	d114d8d0bd	feat(config): add TLS support for HTTPS server configuration - Introduced `TLSConfig` to support HTTPS configurations, including enabling TLS, specifying certificate and key files. - Updated HTTP server logic to handle HTTPS mode when TLS is enabled. - Enhanced `config.example.yaml` with TLS settings example. - Adjusted internal URL generation to respect protocol based on TLS state.	2025-11-24 10:41:29 +08:00
Luis Pater	bb9955e461	fix(auth): resolve index reassignment issue during auth management - Fixed improper handling of `indexAssigned` and `Index` during auth reassignment. - Ensured `EnsureIndex` is invoked after validating existing auth entries.	2025-11-24 10:10:09 +08:00
Luis Pater	7063a176f4	#293 feat(retry): add configurable retry logic with cooldown support - Introduced `max-retry-interval` configuration for cooldown durations between retries. - Added `SetRetryConfig` in `Manager` to handle retry attempts and cooldown intervals. - Enhanced provider execution logic to include retry attempts, cooldown management, and dynamic wait periods. - Updated API endpoints and YAML configuration to support `max-retry-interval`.	2025-11-24 09:55:15 +08:00
Luis Pater	e3082887a6	feat(logging, middleware): add error-based logging support and error log management - Introduced `logOnErrorOnly` mode to enable logging only for error responses when request logging is disabled. - Added endpoints to list and download error logs (`/request-error-logs`). - Implemented error log file cleanup to retain only the newest 10 logs. - Refactored `ResponseWriterWrapper` to support forced logging for error responses. - Enhanced middleware to capture data for upstream error persistence. - Improved log file naming and error log filename generation.	2025-11-23 22:41:57 +08:00
Luis Pater	ddb0c0ec1c	fix(translator): reintroduce `thoughtSignature` bypass logic for model parts - Restored `thoughtSignature` validator bypass for model-specific parts in Gemini content processing. - Removed redundant logic from the `executor` for cleaner handling.	2025-11-23 20:52:23 +08:00
Luis Pater	d1736cb29c	Merge pull request #315 from router-for-me/aistudio fix(aistudio): strip Gemini generation config overrides	2025-11-23 20:25:59 +08:00
hkfires	62bfd62871	fix(aistudio): strip Gemini generation config overrides Remove generationConfig.maxOutputTokens, generationConfig.responseMimeType and generationConfig.responseJsonSchema from the Gemini payload in translateRequest so we no longer send unsupported or conflicting response configuration fields. This lets the backend or caller control response formatting and output limits and helps prevent potential API errors caused by these keys.	2025-11-23 19:44:03 +08:00
Luis Pater	257621c5ed	chore(executor): update default agent version and simplify const formatting - Updated `defaultAntigravityAgent` to version `1.11.5`. - Adjusted const value formatting for improved readability. feat(executor): introduce fallback mechanism for Antigravity base URLs - Added retry logic with fallback order for Antigravity base URLs to handle request errors and rate limits. - Refactored base URL handling with `antigravityBaseURLFallbackOrder` and related utilities. - Enhanced error handling in non-streaming and streaming requests with retry support and improved metadata reporting. - Updated `buildRequest` to support dynamic base URL assignment.	2025-11-23 17:53:07 +08:00
Luis Pater	ac064389ca	feat(executor, translator): enhance token handling and payload processing - Improved Antigravity executor to handle `thinkingConfig` adjustments and default `thinkingBudget` when `thinkingLevel` is removed. - Updated translator response handling to set default values for output token counts when specific token data is missing.	2025-11-23 11:32:37 +08:00
Luis Pater	8d23ffc873	feat(executor): add model alias mapping and improve Antigravity payload handling - Introduced `modelName2Alias` and `alias2ModelName` functions for mapping between model names and aliases. - Improved Antigravity payload transformation to include alias-to-model name conversion. - Enhanced processing for Claude Sonnet models to adjust template parameters based on schema presence.	2025-11-23 03:16:14 +08:00
Luis Pater	4307f08bbc	feat(watcher): optimize auth file handling with hash-based change detection - Added `authFileUnchanged` to skip reloads for unchanged files based on SHA256 hash comparisons. - Introduced `isKnownAuthFile` to verify known files before handling removal events. - Improved event processing in `handleEvent` to reduce unnecessary reloads and enhance performance.	2025-11-23 01:22:16 +08:00