Merge branch 'dev' into fix/cross-model-thinking-signature

2026-02-19 04:40:52 +08:00 · 2026-01-20 10:10:43 +08:00
parent 5977af96a0 5717c7f2f4
commit a7ffc77e3d
114 changed files with 9708 additions and 4851 deletions
--- a/README.md
+++ b/README.md
@@ -130,6 +130,10 @@ Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth
 VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.
 ### [ZeroLimit](https://github.com/0xtbug/zero-limit)
 Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
 > [!NOTE]  
 > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
--- a/README_CN.md
+++ b/README_CN.md
@@ -129,6 +129,10 @@ CLI 封装器，用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户
 一款 VSCode 扩展，提供了在 VSCode 中快速切换 Claude Code 模型的功能，内置 CLIProxyAPI 作为其后端，支持后台自动启动和关闭。
 ### [ZeroLimit](https://github.com/0xtbug/zero-limit)
 Windows 桌面应用，基于 Tauri + React 构建，用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪，提供实时仪表盘、系统托盘集成和一键代理控制，无需 API 密钥。
 > [!NOTE]  
 > 如果你开发了基于 CLIProxyAPI 的项目，请提交一个 PR（拉取请求）将其添加到此列表中。
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -61,6 +61,7 @@ func main() {
 	var iflowLogin bool
 	var iflowCookie bool
 	var noBrowser bool
 	var oauthCallbackPort int
 	var antigravityLogin bool
 	var projectID string
 	var vertexImport string
@@ -75,6 +76,7 @@ func main() {
 	flag.BoolVar(&iflowLogin, "iflow-login", false, "Login to iFlow using OAuth")
 	flag.BoolVar(&iflowCookie, "iflow-cookie", false, "Login to iFlow using Cookie")
 	flag.BoolVar(&noBrowser, "no-browser", false, "Don't open browser automatically for OAuth")
 	flag.IntVar(&oauthCallbackPort, "oauth-callback-port", 0, "Override OAuth callback port (defaults to provider-specific port)")
 	flag.BoolVar(&antigravityLogin, "antigravity-login", false, "Login to Antigravity using OAuth")
 	flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
 	flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
@@ -425,7 +427,8 @@ func main() {
 	// Create login options to be used in authentication flows.
 	options := &cmd.LoginOptions{
-		NoBrowser: noBrowser,
+		NoBrowser:    noBrowser,
 		CallbackPort: oauthCallbackPort,
 	}
 	// Register the shared token store once so all components use the same persistence backend.
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -85,6 +85,10 @@ nonstream-keepalive-interval: 0
 #   keepalive-seconds: 15   # Default: 0 (disabled). <= 0 disables keep-alives.
 #   bootstrap-retries: 1    # Default: 0 (disabled). Retries before first byte is sent.
 # When true, enable official Codex instructions injection for Codex API requests.
 # When false (default), CodexInstructionsForModel returns immediately without modification.
 codex-instructions-enabled: false
 # Gemini API keys
 # gemini-api-key:
 #   - api-key: "AIzaSy...01"
@@ -201,12 +205,27 @@ nonstream-keepalive-interval: 0
 #     - from: "claude-haiku-4-5-20251001"
 #       to: "gemini-2.5-flash"
-# Global OAuth model name mappings (per channel)
+# Global OAuth model name aliases (per channel)
-# These mappings rename model IDs for both model listing and request routing.
+# These aliases rename model IDs for both model listing and request routing.
 # Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
-# NOTE: Mappings do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
+# NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
 # You can repeat the same name with different aliases to expose multiple client model names.
-# oauth-model-mappings:
+oauth-model-alias:
  antigravity:
    - name: "rev19-uic3-1p"
      alias: "gemini-2.5-computer-use-preview-10-2025"
    - name: "gemini-3-pro-image"
      alias: "gemini-3-pro-image-preview"
    - name: "gemini-3-pro-high"
      alias: "gemini-3-pro-preview"
    - name: "gemini-3-flash"
      alias: "gemini-3-flash-preview"
    - name: "claude-sonnet-4-5"
      alias: "gemini-claude-sonnet-4-5"
    - name: "claude-sonnet-4-5-thinking"
      alias: "gemini-claude-sonnet-4-5-thinking"
    - name: "claude-opus-4-5-thinking"
      alias: "gemini-claude-opus-4-5-thinking"
 #   gemini-cli:
 #     - name: "gemini-2.5-pro"          # original model name under this channel
 #       alias: "g2.5p"                  # client-visible alias
@@ -217,9 +236,6 @@ nonstream-keepalive-interval: 0
 #   aistudio:
 #     - name: "gemini-2.5-pro"
 #       alias: "g2.5p"
 #   antigravity:
 #     - name: "gemini-3-pro-preview"
 #       alias: "g3p"
 #   claude:
 #     - name: "claude-sonnet-4-5-20250929"
 #       alias: "cs4.5"
@@ -263,9 +279,21 @@ nonstream-keepalive-interval: 0
 #           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> value
 #         "generationConfig.thinkingConfig.thinkingBudget": 32768
 #   default-raw: # Default raw rules set parameters using raw JSON when missing (must be valid JSON).
 #     - models:
 #         - name: "gemini-2.5-pro" # Supports wildcards (e.g., "gemini-*")
 #           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
 #         "generationConfig.responseJsonSchema": "{\"type\":\"object\",\"properties\":{\"answer\":{\"type\":\"string\"}}}"
 #   override: # Override rules always set parameters, overwriting any existing values.
 #     - models:
 #         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
 #           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> value
 #         "reasoning.effort": "high"
 #   override-raw: # Override raw rules always set parameters using raw JSON (must be valid JSON).
 #     - models:
 #         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
 #           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
 #         "response_format": "{\"type\":\"json_schema\",\"json_schema\":{\"name\":\"answer\",\"schema\":{\"type\":\"object\"}}}"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -22,7 +22,7 @@ services:
      - "51121:51121"
      - "11451:11451"
    volumes:
-      - ./config.yaml:/CLIProxyAPI/config.yaml
+      - ${CLI_PROXY_CONFIG_PATH:-./config.yaml}:/CLIProxyAPI/config.yaml
-      - ./auths:/root/.cli-proxy-api
+      - ${CLI_PROXY_AUTH_PATH:-./auths}:/root/.cli-proxy-api
-      - ./logs:/CLIProxyAPI/logs
+      - ${CLI_PROXY_LOG_PATH:-./logs}:/CLIProxyAPI/logs
    restart: unless-stopped
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -1703,7 +1703,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 		// Create token storage
 		tokenStorage := qwenAuth.CreateTokenStorage(tokenData)
-		tokenStorage.Email = fmt.Sprintf("qwen-%d", time.Now().UnixMilli())
+		tokenStorage.Email = fmt.Sprintf("%d", time.Now().UnixMilli())
 		record := &coreauth.Auth{
 			ID:       fmt.Sprintf("qwen-%s.json", tokenStorage.Email),
 			Provider: "qwen",
@@ -1808,7 +1808,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 		tokenStorage := authSvc.CreateTokenStorage(tokenData)
 		identifier := strings.TrimSpace(tokenStorage.Email)
 		if identifier == "" {
-			identifier = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+			identifier = fmt.Sprintf("%d", time.Now().UnixMilli())
 			tokenStorage.Email = identifier
 		}
 		record := &coreauth.Auth{
@@ -1893,15 +1893,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	fileName := iflowauth.SanitizeIFlowFileName(email)
 	if fileName == "" {
 		fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
 	} else {
 		fileName = fmt.Sprintf("iflow-%s", fileName)
 	}
 	tokenStorage.Email = email
 	timestamp := time.Now().Unix()
 	record := &coreauth.Auth{
-		ID:       fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		ID:       fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Provider: "iflow",
-		FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		FileName: fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Storage:  tokenStorage,
 		Metadata: map[string]any{
 			"email":        email,
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -703,21 +703,21 @@ func (h *Handler) DeleteOAuthExcludedModels(c *gin.Context) {
 	h.persist(c)
 }
-// oauth-model-mappings: map[string][]ModelNameMapping
+// oauth-model-alias: map[string][]OAuthModelAlias
-func (h *Handler) GetOAuthModelMappings(c *gin.Context) {
+func (h *Handler) GetOAuthModelAlias(c *gin.Context) {
-	c.JSON(200, gin.H{"oauth-model-mappings": sanitizedOAuthModelMappings(h.cfg.OAuthModelMappings)})
+	c.JSON(200, gin.H{"oauth-model-alias": sanitizedOAuthModelAlias(h.cfg.OAuthModelAlias)})
 }
-func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
+func (h *Handler) PutOAuthModelAlias(c *gin.Context) {
 	data, err := c.GetRawData()
 	if err != nil {
 		c.JSON(400, gin.H{"error": "failed to read body"})
 		return
 	}
-	var entries map[string][]config.ModelNameMapping
+	var entries map[string][]config.OAuthModelAlias
 	if err = json.Unmarshal(data, &entries); err != nil {
 		var wrapper struct {
-			Items map[string][]config.ModelNameMapping `json:"items"`
+			Items map[string][]config.OAuthModelAlias `json:"items"`
 		}
 		if err2 := json.Unmarshal(data, &wrapper); err2 != nil {
 			c.JSON(400, gin.H{"error": "invalid body"})
@@ -725,15 +725,15 @@ func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
 		}
 		entries = wrapper.Items
 	}
-	h.cfg.OAuthModelMappings = sanitizedOAuthModelMappings(entries)
+	h.cfg.OAuthModelAlias = sanitizedOAuthModelAlias(entries)
 	h.persist(c)
 }
-func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
+func (h *Handler) PatchOAuthModelAlias(c *gin.Context) {
 	var body struct {
-		Provider *string                   `json:"provider"`
+		Provider *string                  `json:"provider"`
-		Channel  *string                   `json:"channel"`
+		Channel  *string                  `json:"channel"`
-		Mappings []config.ModelNameMapping `json:"mappings"`
+		Aliases  []config.OAuthModelAlias `json:"aliases"`
 	}
 	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
 		c.JSON(400, gin.H{"error": "invalid body"})
@@ -751,32 +751,32 @@ func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
 		return
 	}
-	normalizedMap := sanitizedOAuthModelMappings(map[string][]config.ModelNameMapping{channel: body.Mappings})
+	normalizedMap := sanitizedOAuthModelAlias(map[string][]config.OAuthModelAlias{channel: body.Aliases})
 	normalized := normalizedMap[channel]
 	if len(normalized) == 0 {
-		if h.cfg.OAuthModelMappings == nil {
+		if h.cfg.OAuthModelAlias == nil {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
-		if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
+		if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
-		delete(h.cfg.OAuthModelMappings, channel)
+		delete(h.cfg.OAuthModelAlias, channel)
-		if len(h.cfg.OAuthModelMappings) == 0 {
+		if len(h.cfg.OAuthModelAlias) == 0 {
-			h.cfg.OAuthModelMappings = nil
+			h.cfg.OAuthModelAlias = nil
 		}
 		h.persist(c)
 		return
 	}
-	if h.cfg.OAuthModelMappings == nil {
+	if h.cfg.OAuthModelAlias == nil {
-		h.cfg.OAuthModelMappings = make(map[string][]config.ModelNameMapping)
+		h.cfg.OAuthModelAlias = make(map[string][]config.OAuthModelAlias)
 	}
-	h.cfg.OAuthModelMappings[channel] = normalized
+	h.cfg.OAuthModelAlias[channel] = normalized
 	h.persist(c)
 }
-func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
+func (h *Handler) DeleteOAuthModelAlias(c *gin.Context) {
 	channel := strings.ToLower(strings.TrimSpace(c.Query("channel")))
 	if channel == "" {
 		channel = strings.ToLower(strings.TrimSpace(c.Query("provider")))
@@ -785,17 +785,17 @@ func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
 		c.JSON(400, gin.H{"error": "missing channel"})
 		return
 	}
-	if h.cfg.OAuthModelMappings == nil {
+	if h.cfg.OAuthModelAlias == nil {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
-	if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
+	if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
-	delete(h.cfg.OAuthModelMappings, channel)
+	delete(h.cfg.OAuthModelAlias, channel)
-	if len(h.cfg.OAuthModelMappings) == 0 {
+	if len(h.cfg.OAuthModelAlias) == 0 {
-		h.cfg.OAuthModelMappings = nil
+		h.cfg.OAuthModelAlias = nil
 	}
 	h.persist(c)
 }
@@ -1042,26 +1042,26 @@ func normalizeVertexCompatKey(entry *config.VertexCompatKey) {
 	entry.Models = normalized
 }
-func sanitizedOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string][]config.ModelNameMapping {
+func sanitizedOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string][]config.OAuthModelAlias {
 	if len(entries) == 0 {
 		return nil
 	}
-	copied := make(map[string][]config.ModelNameMapping, len(entries))
+	copied := make(map[string][]config.OAuthModelAlias, len(entries))
-	for channel, mappings := range entries {
+	for channel, aliases := range entries {
-		if len(mappings) == 0 {
+		if len(aliases) == 0 {
 			continue
 		}
-		copied[channel] = append([]config.ModelNameMapping(nil), mappings...)
+		copied[channel] = append([]config.OAuthModelAlias(nil), aliases...)
 	}
 	if len(copied) == 0 {
 		return nil
 	}
-	cfg := config.Config{OAuthModelMappings: copied}
+	cfg := config.Config{OAuthModelAlias: copied}
-	cfg.SanitizeOAuthModelMappings()
+	cfg.SanitizeOAuthModelAlias()
-	if len(cfg.OAuthModelMappings) == 0 {
+	if len(cfg.OAuthModelAlias) == 0 {
 		return nil
 	}
-	return cfg.OAuthModelMappings
+	return cfg.OAuthModelAlias
 }
 // GetAmpCode returns the complete ampcode configuration.
--- a/internal/api/handlers/management/logs.go
+++ b/internal/api/handlers/management/logs.go
@@ -13,7 +13,7 @@ import (
 	"time"
 	"github.com/gin-gonic/gin"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )
 const (
@@ -360,16 +360,7 @@ func (h *Handler) logDirectory() string {
 	if h.logDir != "" {
 		return h.logDir
 	}
-	if base := util.WritablePath(); base != "" {
+	return logging.ResolveLogDirectory(h.cfg)
 		return filepath.Join(base, "logs")
 	}
 	if h.configFilePath != "" {
 		dir := filepath.Dir(h.configFilePath)
 		if dir != "" && dir != "." {
 			return filepath.Join(dir, "logs")
 		}
 	}
 	return "logs"
 }
 func (h *Handler) collectLogFiles(dir string) ([]string, error) {
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -8,6 +8,7 @@ import (
 	"time"
 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -134,10 +135,11 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 		}
 		// Normalize model (handles dynamic thinking suffixes)
-		normalizedModel, thinkingMetadata := util.NormalizeThinkingModel(modelName)
+		suffixResult := thinking.ParseSuffix(modelName)
 		normalizedModel := suffixResult.ModelName
 		thinkingSuffix := ""
-		if thinkingMetadata != nil && strings.HasPrefix(modelName, normalizedModel) {
+		if suffixResult.HasSuffix {
-			thinkingSuffix = modelName[len(normalizedModel):]
+			thinkingSuffix = "(" + suffixResult.RawSuffix + ")"
 		}
 		resolveMappedModel := func() (string, []string) {
@@ -157,13 +159,13 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			// Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target
 			// already specifies its own thinking suffix.
 			if thinkingSuffix != "" {
-				_, mappedThinkingMetadata := util.NormalizeThinkingModel(mappedModel)
+				mappedSuffixResult := thinking.ParseSuffix(mappedModel)
-				if mappedThinkingMetadata == nil {
+				if !mappedSuffixResult.HasSuffix {
 					mappedModel += thinkingSuffix
 				}
 			}
-			mappedBaseModel, _ := util.NormalizeThinkingModel(mappedModel)
+			mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName
 			mappedProviders := util.GetProviderName(mappedBaseModel)
 			if len(mappedProviders) == 0 {
 				return "", nil
--- a/internal/api/modules/amp/model_mapping.go
+++ b/internal/api/modules/amp/model_mapping.go
@@ -8,6 +8,7 @@ import (
 	"sync"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )
@@ -44,6 +45,11 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
 // MapModel checks if a mapping exists for the requested model and if the
 // target model has available local providers. Returns the mapped model name
 // or empty string if no valid mapping exists.
 //
 // If the requested model contains a thinking suffix (e.g., "g25p(8192)"),
 // the suffix is preserved in the returned model name (e.g., "gemini-2.5-pro(8192)").
 // However, if the mapping target already contains a suffix, the config suffix
 // takes priority over the user's suffix.
 func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 	if requestedModel == "" {
 		return ""
@@ -52,16 +58,20 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 	m.mu.RLock()
 	defer m.mu.RUnlock()
-	// Normalize the requested model for lookup
+	// Extract thinking suffix from requested model using ParseSuffix
-	normalizedRequest := strings.ToLower(strings.TrimSpace(requestedModel))
+	requestResult := thinking.ParseSuffix(requestedModel)
 	baseModel := requestResult.ModelName
-	// Check for direct mapping
+	// Normalize the base model for lookup (case-insensitive)
-	targetModel, exists := m.mappings[normalizedRequest]
+	normalizedBase := strings.ToLower(strings.TrimSpace(baseModel))
 	// Check for direct mapping using base model name
 	targetModel, exists := m.mappings[normalizedBase]
 	if !exists {
-		// Try regex mappings in order
+		// Try regex mappings in order using base model only
-		base, _ := util.NormalizeThinkingModel(requestedModel)
+		// (suffix is handled separately via ParseSuffix)
 		for _, rm := range m.regexps {
-			if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) {
+			if rm.re.MatchString(baseModel) {
 				targetModel = rm.to
 				exists = true
 				break
@@ -72,14 +82,28 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 		}
 	}
-	// Verify target model has available providers
+	// Check if target model already has a thinking suffix (config priority)
-	normalizedTarget, _ := util.NormalizeThinkingModel(targetModel)
+	targetResult := thinking.ParseSuffix(targetModel)
-	providers := util.GetProviderName(normalizedTarget)
+
 	// Verify target model has available providers (use base model for lookup)
 	providers := util.GetProviderName(targetResult.ModelName)
 	if len(providers) == 0 {
 		log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel)
 		return ""
 	}
 	// Suffix handling: config suffix takes priority, otherwise preserve user suffix
 	if targetResult.HasSuffix {
 		// Config's "to" already contains a suffix - use it as-is (config priority)
 		return targetModel
 	}
 	// Preserve user's thinking suffix on the mapped model
 	// (skip empty suffixes to avoid returning "model()")
 	if requestResult.HasSuffix && requestResult.RawSuffix != "" {
 		return targetModel + "(" + requestResult.RawSuffix + ")"
 	}
 	// Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go
 	return targetModel
 }
--- a/internal/api/modules/amp/model_mapping_test.go
+++ b/internal/api/modules/amp/model_mapping_test.go
@@ -217,10 +217,10 @@ func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) {
 	mapper := NewModelMapper(mappings)
-	// Incoming model has reasoning suffix but should match base via regex
+	// Incoming model has reasoning suffix, regex matches base, suffix is preserved
 	result := mapper.MapModel("gpt-5(high)")
-	if result != "gemini-2.5-pro" {
+	if result != "gemini-2.5-pro(high)" {
-		t.Errorf("Expected gemini-2.5-pro, got %s", result)
+		t.Errorf("Expected gemini-2.5-pro(high), got %s", result)
 	}
 }
@@ -281,3 +281,95 @@ func TestModelMapper_Regex_CaseInsensitive(t *testing.T) {
 		t.Errorf("Expected claude-sonnet-4, got %s", result)
 	}
 }
 func TestModelMapper_SuffixPreservation(t *testing.T) {
 	reg := registry.GetGlobalRegistry()
 	// Register test models
 	reg.RegisterClient("test-client-suffix", "gemini", []*registry.ModelInfo{
 		{ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"},
 	})
 	reg.RegisterClient("test-client-suffix-2", "claude", []*registry.ModelInfo{
 		{ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"},
 	})
 	defer reg.UnregisterClient("test-client-suffix")
 	defer reg.UnregisterClient("test-client-suffix-2")
 	tests := []struct {
 		name     string
 		mappings []config.AmpModelMapping
 		input    string
 		want     string
 	}{
 		{
 			name:     "numeric suffix preserved",
 			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
 			input:    "g25p(8192)",
 			want:     "gemini-2.5-pro(8192)",
 		},
 		{
 			name:     "level suffix preserved",
 			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
 			input:    "g25p(high)",
 			want:     "gemini-2.5-pro(high)",
 		},
 		{
 			name:     "no suffix unchanged",
 			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
 			input:    "g25p",
 			want:     "gemini-2.5-pro",
 		},
 		{
 			name:     "config suffix takes priority",
 			mappings: []config.AmpModelMapping{{From: "alias", To: "gemini-2.5-pro(medium)"}},
 			input:    "alias(high)",
 			want:     "gemini-2.5-pro(medium)",
 		},
 		{
 			name:     "regex with suffix preserved",
 			mappings: []config.AmpModelMapping{{From: "^g25.*", To: "gemini-2.5-pro", Regex: true}},
 			input:    "g25p(8192)",
 			want:     "gemini-2.5-pro(8192)",
 		},
 		{
 			name:     "auto suffix preserved",
 			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
 			input:    "g25p(auto)",
 			want:     "gemini-2.5-pro(auto)",
 		},
 		{
 			name:     "none suffix preserved",
 			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
 			input:    "g25p(none)",
 			want:     "gemini-2.5-pro(none)",
 		},
 		{
 			name:     "case insensitive base lookup with suffix",
 			mappings: []config.AmpModelMapping{{From: "G25P", To: "gemini-2.5-pro"}},
 			input:    "g25p(high)",
 			want:     "gemini-2.5-pro(high)",
 		},
 		{
 			name:     "empty suffix filtered out",
 			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
 			input:    "g25p()",
 			want:     "gemini-2.5-pro",
 		},
 		{
 			name:     "incomplete suffix treated as no suffix",
 			mappings: []config.AmpModelMapping{{From: "g25p(high", To: "gemini-2.5-pro"}},
 			input:    "g25p(high",
 			want:     "gemini-2.5-pro",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			mapper := NewModelMapper(tt.mappings)
 			got := mapper.MapModel(tt.input)
 			if got != tt.want {
 				t.Errorf("MapModel(%q) = %q, want %q", tt.input, got, tt.want)
 			}
 		})
 	}
 }
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -26,6 +26,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
@@ -254,15 +255,13 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	}
 	managementasset.SetCurrentConfig(cfg)
 	auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
 	misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
 	// Initialize management handler
 	s.mgmt = managementHandlers.NewHandler(cfg, configFilePath, authManager)
 	if optionState.localPassword != "" {
 		s.mgmt.SetLocalPassword(optionState.localPassword)
 	}
-	logDir := filepath.Join(s.currentPath, "logs")
+	logDir := logging.ResolveLogDirectory(cfg)
 	if base := util.WritablePath(); base != "" {
 		logDir = filepath.Join(base, "logs")
 	}
 	s.mgmt.SetLogDirectory(logDir)
 	s.localPassword = optionState.localPassword
@@ -601,10 +600,10 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PATCH("/oauth-excluded-models", s.mgmt.PatchOAuthExcludedModels)
 		mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)
-		mgmt.GET("/oauth-model-mappings", s.mgmt.GetOAuthModelMappings)
+		mgmt.GET("/oauth-model-alias", s.mgmt.GetOAuthModelAlias)
-		mgmt.PUT("/oauth-model-mappings", s.mgmt.PutOAuthModelMappings)
+		mgmt.PUT("/oauth-model-alias", s.mgmt.PutOAuthModelAlias)
-		mgmt.PATCH("/oauth-model-mappings", s.mgmt.PatchOAuthModelMappings)
+		mgmt.PATCH("/oauth-model-alias", s.mgmt.PatchOAuthModelAlias)
-		mgmt.DELETE("/oauth-model-mappings", s.mgmt.DeleteOAuthModelMappings)
+		mgmt.DELETE("/oauth-model-alias", s.mgmt.DeleteOAuthModelAlias)
 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
@@ -912,6 +911,16 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 			log.Debugf("disable_cooling toggled to %t", cfg.DisableCooling)
 		}
 	}
 	if oldCfg == nil || oldCfg.CodexInstructionsEnabled != cfg.CodexInstructionsEnabled {
 		misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
 		if oldCfg != nil {
 			log.Debugf("codex_instructions_enabled updated from %t to %t", oldCfg.CodexInstructionsEnabled, cfg.CodexInstructionsEnabled)
 		} else {
 			log.Debugf("codex_instructions_enabled toggled to %t", cfg.CodexInstructionsEnabled)
 		}
 	}
 	if s.handlers != nil && s.handlers.AuthManager != nil {
 		s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
 	}
--- a/internal/auth/gemini/gemini_auth.go
+++ b/internal/auth/gemini/gemini_auth.go
@@ -29,8 +29,9 @@ import (
 )
 const (
-	geminiOauthClientID     = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+	geminiOauthClientID       = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-	geminiOauthClientSecret = "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"
+	geminiOauthClientSecret   = "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"
 	geminiDefaultCallbackPort = 8085
 )
 var (
@@ -49,8 +50,9 @@ type GeminiAuth struct {
 // WebLoginOptions customizes the interactive OAuth flow.
 type WebLoginOptions struct {
-	NoBrowser bool
+	NoBrowser    bool
-	Prompt    func(string) (string, error)
+	CallbackPort int
 	Prompt       func(string) (string, error)
 }
 // NewGeminiAuth creates a new instance of GeminiAuth.
@@ -72,6 +74,12 @@ func NewGeminiAuth() *GeminiAuth {
 //   - *http.Client: An HTTP client configured with authentication
 //   - error: An error if the client configuration fails, nil otherwise
 func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiTokenStorage, cfg *config.Config, opts *WebLoginOptions) (*http.Client, error) {
 	callbackPort := geminiDefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
 	callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort)
 	// Configure proxy settings for the HTTP client if a proxy URL is provided.
 	proxyURL, err := url.Parse(cfg.ProxyURL)
 	if err == nil {
@@ -106,7 +114,7 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken
 	conf := &oauth2.Config{
 		ClientID:     geminiOauthClientID,
 		ClientSecret: geminiOauthClientSecret,
-		RedirectURL:  "http://localhost:8085/oauth2callback", // This will be used by the local server.
+		RedirectURL:  callbackURL, // This will be used by the local server.
 		Scopes:       geminiOauthScopes,
 		Endpoint:     google.Endpoint,
 	}
@@ -218,14 +226,20 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 //   - *oauth2.Token: The OAuth2 token obtained from the authorization flow
 //   - error: An error if the token acquisition fails, nil otherwise
 func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, opts *WebLoginOptions) (*oauth2.Token, error) {
 	callbackPort := geminiDefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
 	callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort)
 	// Use a channel to pass the authorization code from the HTTP handler to the main function.
 	codeChan := make(chan string, 1)
 	errChan := make(chan error, 1)
 	// Create a new HTTP server with its own multiplexer.
 	mux := http.NewServeMux()
-	server := &http.Server{Addr: ":8085", Handler: mux}
+	server := &http.Server{Addr: fmt.Sprintf(":%d", callbackPort), Handler: mux}
-	config.RedirectURL = "http://localhost:8085/oauth2callback"
+	config.RedirectURL = callbackURL
 	mux.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) {
 		if err := r.URL.Query().Get("error"); err != "" {
@@ -277,13 +291,13 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
 		// Check if browser is available
 		if !browser.IsAvailable() {
 			log.Warn("No browser available on this system")
-			util.PrintSSHTunnelInstructions(8085)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL)
 		} else {
 			if err := browser.OpenURL(authURL); err != nil {
 				authErr := codex.NewAuthenticationError(codex.ErrBrowserOpenFailed, err)
 				log.Warn(codex.GetUserFriendlyMessage(authErr))
-				util.PrintSSHTunnelInstructions(8085)
+				util.PrintSSHTunnelInstructions(callbackPort)
 				fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL)
 				// Log platform info for debugging
@@ -294,7 +308,7 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
 			}
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(8085)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Please open this URL in your browser:\n\n%s\n", authURL)
 	}
--- a/internal/cmd/anthropic_login.go
+++ b/internal/cmd/anthropic_login.go
@@ -32,9 +32,10 @@ func DoClaudeLogin(cfg *config.Config, options *LoginOptions) {
 	manager := newAuthManager()
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
+		NoBrowser:    options.NoBrowser,
-		Metadata:  map[string]string{},
+		CallbackPort: options.CallbackPort,
-		Prompt:    promptFn,
+		Metadata:     map[string]string{},
 		Prompt:       promptFn,
 	}
 	_, savedPath, err := manager.Login(context.Background(), "claude", cfg, authOpts)
--- a/internal/cmd/antigravity_login.go
+++ b/internal/cmd/antigravity_login.go
@@ -22,9 +22,10 @@ func DoAntigravityLogin(cfg *config.Config, options *LoginOptions) {
 	manager := newAuthManager()
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
+		NoBrowser:    options.NoBrowser,
-		Metadata:  map[string]string{},
+		CallbackPort: options.CallbackPort,
-		Prompt:    promptFn,
+		Metadata:     map[string]string{},
 		Prompt:       promptFn,
 	}
 	record, savedPath, err := manager.Login(context.Background(), "antigravity", cfg, authOpts)
--- a/internal/cmd/iflow_login.go
+++ b/internal/cmd/iflow_login.go
@@ -24,9 +24,10 @@ func DoIFlowLogin(cfg *config.Config, options *LoginOptions) {
 	}
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
+		NoBrowser:    options.NoBrowser,
-		Metadata:  map[string]string{},
+		CallbackPort: options.CallbackPort,
-		Prompt:    promptFn,
+		Metadata:     map[string]string{},
 		Prompt:       promptFn,
 	}
 	_, savedPath, err := manager.Login(context.Background(), "iflow", cfg, authOpts)
--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -67,10 +67,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	}
 	loginOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
+		NoBrowser:    options.NoBrowser,
-		ProjectID: trimmedProjectID,
+		ProjectID:    trimmedProjectID,
-		Metadata:  map[string]string{},
+		CallbackPort: options.CallbackPort,
-		Prompt:    callbackPrompt,
+		Metadata:     map[string]string{},
 		Prompt:       callbackPrompt,
 	}
 	authenticator := sdkAuth.NewGeminiAuthenticator()
@@ -88,8 +89,9 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	geminiAuth := gemini.NewGeminiAuth()
 	httpClient, errClient := geminiAuth.GetAuthenticatedClient(ctx, storage, cfg, &gemini.WebLoginOptions{
-		NoBrowser: options.NoBrowser,
+		NoBrowser:    options.NoBrowser,
-		Prompt:    callbackPrompt,
+		CallbackPort: options.CallbackPort,
 		Prompt:       callbackPrompt,
 	})
 	if errClient != nil {
 		log.Errorf("Gemini authentication failed: %v", errClient)
--- a/internal/cmd/openai_login.go
+++ b/internal/cmd/openai_login.go
@@ -19,6 +19,9 @@ type LoginOptions struct {
 	// NoBrowser indicates whether to skip opening the browser automatically.
 	NoBrowser bool
 	// CallbackPort overrides the local OAuth callback port when set (>0).
 	CallbackPort int
 	// Prompt allows the caller to provide interactive input when needed.
 	Prompt func(prompt string) (string, error)
 }
@@ -43,9 +46,10 @@ func DoCodexLogin(cfg *config.Config, options *LoginOptions) {
 	manager := newAuthManager()
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
+		NoBrowser:    options.NoBrowser,
-		Metadata:  map[string]string{},
+		CallbackPort: options.CallbackPort,
-		Prompt:    promptFn,
+		Metadata:     map[string]string{},
 		Prompt:       promptFn,
 	}
 	_, savedPath, err := manager.Login(context.Background(), "codex", cfg, authOpts)
--- a/internal/cmd/qwen_login.go
+++ b/internal/cmd/qwen_login.go
@@ -36,9 +36,10 @@ func DoQwenLogin(cfg *config.Config, options *LoginOptions) {
 	}
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
+		NoBrowser:    options.NoBrowser,
-		Metadata:  map[string]string{},
+		CallbackPort: options.CallbackPort,
-		Prompt:    promptFn,
+		Metadata:     map[string]string{},
 		Prompt:       promptFn,
 	}
 	_, savedPath, err := manager.Login(context.Background(), "qwen", cfg, authOpts)
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -6,12 +6,14 @@ package config
 import (
 	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"strings"
 	"syscall"
 	log "github.com/sirupsen/logrus"
 	"golang.org/x/crypto/bcrypt"
 	"gopkg.in/yaml.v3"
 )
@@ -69,6 +71,11 @@ type Config struct {
 	// WebsocketAuth enables or disables authentication for the WebSocket API.
 	WebsocketAuth bool `yaml:"ws-auth" json:"ws-auth"`
 	// CodexInstructionsEnabled controls whether official Codex instructions are injected.
 	// When false (default), CodexInstructionsForModel returns immediately without modification.
 	// When true, the original instruction injection logic is used.
 	CodexInstructionsEnabled bool `yaml:"codex-instructions-enabled" json:"codex-instructions-enabled"`
 	// GeminiKey defines Gemini API key configurations with optional routing overrides.
 	GeminiKey []GeminiKey `yaml:"gemini-api-key" json:"gemini-api-key"`
@@ -91,13 +98,13 @@ type Config struct {
 	// OAuthExcludedModels defines per-provider global model exclusions applied to OAuth/file-backed auth entries.
 	OAuthExcludedModels map[string][]string `yaml:"oauth-excluded-models,omitempty" json:"oauth-excluded-models,omitempty"`
-	// OAuthModelMappings defines global model name mappings for OAuth/file-backed auth channels.
+	// OAuthModelAlias defines global model name aliases for OAuth/file-backed auth channels.
-	// These mappings affect both model listing and model routing for supported channels:
+	// These aliases affect both model listing and model routing for supported channels:
 	// gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
 	//
 	// NOTE: This does not apply to existing per-credential model alias features under:
 	// gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, and ampcode.
-	OAuthModelMappings map[string][]ModelNameMapping `yaml:"oauth-model-mappings,omitempty" json:"oauth-model-mappings,omitempty"`
+	OAuthModelAlias map[string][]OAuthModelAlias `yaml:"oauth-model-alias,omitempty" json:"oauth-model-alias,omitempty"`
 	// Payload defines default and override rules for provider payload parameters.
 	Payload PayloadConfig `yaml:"payload" json:"payload"`
@@ -145,11 +152,11 @@ type RoutingConfig struct {
 	Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"`
 }
-// ModelNameMapping defines a model ID mapping for a specific channel.
+// OAuthModelAlias defines a model ID alias for a specific channel.
 // It maps the upstream model name (Name) to the client-visible alias (Alias).
 // When Fork is true, the alias is added as an additional model in listings while
 // keeping the original model ID available.
-type ModelNameMapping struct {
+type OAuthModelAlias struct {
 	Name  string `yaml:"name" json:"name"`
 	Alias string `yaml:"alias" json:"alias"`
 	Fork  bool   `yaml:"fork,omitempty" json:"fork,omitempty"`
@@ -216,8 +223,12 @@ type AmpUpstreamAPIKeyEntry struct {
 type PayloadConfig struct {
 	// Default defines rules that only set parameters when they are missing in the payload.
 	Default []PayloadRule `yaml:"default" json:"default"`
 	// DefaultRaw defines rules that set raw JSON values only when they are missing.
 	DefaultRaw []PayloadRule `yaml:"default-raw" json:"default-raw"`
 	// Override defines rules that always set parameters, overwriting any existing values.
 	Override []PayloadRule `yaml:"override" json:"override"`
 	// OverrideRaw defines rules that always set raw JSON values, overwriting any existing values.
 	OverrideRaw []PayloadRule `yaml:"override-raw" json:"override-raw"`
 }
 // PayloadRule describes a single rule targeting a list of models with parameter updates.
@@ -225,6 +236,7 @@ type PayloadRule struct {
 	// Models lists model entries with name pattern and protocol constraint.
 	Models []PayloadModelRule `yaml:"models" json:"models"`
 	// Params maps JSON paths (gjson/sjson syntax) to values written into the payload.
 	// For *-raw rules, values are treated as raw JSON fragments (strings are used as-is).
 	Params map[string]any `yaml:"params" json:"params"`
 }
@@ -242,6 +254,10 @@ type ClaudeKey struct {
 	// APIKey is the authentication key for accessing Claude API services.
 	APIKey string `yaml:"api-key" json:"api-key"`
 	// Priority controls selection preference when multiple credentials match.
 	// Higher values are preferred; defaults to 0.
 	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
 	// Prefix optionally namespaces models for this credential (e.g., "teamA/claude-sonnet-4").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
@@ -262,6 +278,9 @@ type ClaudeKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }
 func (k ClaudeKey) GetAPIKey() string  { return k.APIKey }
 func (k ClaudeKey) GetBaseURL() string { return k.BaseURL }
 // ClaudeModel describes a mapping between an alias and the actual upstream model name.
 type ClaudeModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -280,6 +299,10 @@ type CodexKey struct {
 	// APIKey is the authentication key for accessing Codex API services.
 	APIKey string `yaml:"api-key" json:"api-key"`
 	// Priority controls selection preference when multiple credentials match.
 	// Higher values are preferred; defaults to 0.
 	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
 	// Prefix optionally namespaces models for this credential (e.g., "teamA/gpt-5-codex").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
@@ -300,6 +323,9 @@ type CodexKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }
 func (k CodexKey) GetAPIKey() string  { return k.APIKey }
 func (k CodexKey) GetBaseURL() string { return k.BaseURL }
 // CodexModel describes a mapping between an alias and the actual upstream model name.
 type CodexModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -318,6 +344,10 @@ type GeminiKey struct {
 	// APIKey is the authentication key for accessing Gemini API services.
 	APIKey string `yaml:"api-key" json:"api-key"`
 	// Priority controls selection preference when multiple credentials match.
 	// Higher values are preferred; defaults to 0.
 	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
 	// Prefix optionally namespaces models for this credential (e.g., "teamA/gemini-3-pro-preview").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
@@ -337,6 +367,9 @@ type GeminiKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }
 func (k GeminiKey) GetAPIKey() string  { return k.APIKey }
 func (k GeminiKey) GetBaseURL() string { return k.BaseURL }
 // GeminiModel describes a mapping between an alias and the actual upstream model name.
 type GeminiModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -355,6 +388,10 @@ type OpenAICompatibility struct {
 	// Name is the identifier for this OpenAI compatibility configuration.
 	Name string `yaml:"name" json:"name"`
 	// Priority controls selection preference when multiple providers or credentials match.
 	// Higher values are preferred; defaults to 0.
 	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
 	// Prefix optionally namespaces model aliases for this provider (e.g., "teamA/kimi-k2").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
@@ -390,6 +427,9 @@ type OpenAICompatibilityModel struct {
 	Alias string `yaml:"alias" json:"alias"`
 }
 func (m OpenAICompatibilityModel) GetName() string  { return m.Name }
 func (m OpenAICompatibilityModel) GetAlias() string { return m.Alias }
 // LoadConfig reads a YAML configuration file from the given path,
 // unmarshals it into a Config struct, applies environment variable overrides,
 // and returns it.
@@ -408,6 +448,15 @@ func LoadConfig(configFile string) (*Config, error) {
 // If optional is true and the file is missing, it returns an empty Config.
 // If optional is true and the file is empty or invalid, it returns an empty Config.
 func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Perform oauth-model-alias migration before loading config.
 	// This migrates oauth-model-mappings to oauth-model-alias if needed.
 	if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
 		// Log warning but don't fail - config loading should still work
 		fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
 	} else if migrated {
 		fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
 	}
 	// Read the entire configuration file into memory.
 	data, err := os.ReadFile(configFile)
 	if err != nil {
@@ -500,8 +549,11 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Normalize OAuth provider model exclusion map.
 	cfg.OAuthExcludedModels = NormalizeOAuthExcludedModels(cfg.OAuthExcludedModels)
-	// Normalize global OAuth model name mappings.
+	// Normalize global OAuth model name aliases.
-	cfg.SanitizeOAuthModelMappings()
+	cfg.SanitizeOAuthModelAlias()
 	// Validate raw payload rules and drop invalid entries.
 	cfg.SanitizePayloadRules()
 	if cfg.legacyMigrationPending {
 		fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
@@ -519,24 +571,79 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	return &cfg, nil
 }
-// SanitizeOAuthModelMappings normalizes and deduplicates global OAuth model name mappings.
+// SanitizePayloadRules validates raw JSON payload rule params and drops invalid rules.
-// It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
+func (cfg *Config) SanitizePayloadRules() {
-// allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
+	if cfg == nil {
 func (cfg *Config) SanitizeOAuthModelMappings() {
 	if cfg == nil || len(cfg.OAuthModelMappings) == 0 {
 		return
 	}
-	out := make(map[string][]ModelNameMapping, len(cfg.OAuthModelMappings))
+	cfg.Payload.DefaultRaw = sanitizePayloadRawRules(cfg.Payload.DefaultRaw, "default-raw")
-	for rawChannel, mappings := range cfg.OAuthModelMappings {
+	cfg.Payload.OverrideRaw = sanitizePayloadRawRules(cfg.Payload.OverrideRaw, "override-raw")
-		channel := strings.ToLower(strings.TrimSpace(rawChannel))
+}
-		if channel == "" || len(mappings) == 0 {
+
 func sanitizePayloadRawRules(rules []PayloadRule, section string) []PayloadRule {
 	if len(rules) == 0 {
 		return rules
 	}
 	out := make([]PayloadRule, 0, len(rules))
 	for i := range rules {
 		rule := rules[i]
 		if len(rule.Params) == 0 {
 			continue
 		}
-		seenAlias := make(map[string]struct{}, len(mappings))
+		invalid := false
-		clean := make([]ModelNameMapping, 0, len(mappings))
+		for path, value := range rule.Params {
-		for _, mapping := range mappings {
+			raw, ok := payloadRawString(value)
-			name := strings.TrimSpace(mapping.Name)
+			if !ok {
-			alias := strings.TrimSpace(mapping.Alias)
+				continue
 			}
 			trimmed := bytes.TrimSpace(raw)
 			if len(trimmed) == 0 || !json.Valid(trimmed) {
 				log.WithFields(log.Fields{
 					"section":    section,
 					"rule_index": i + 1,
 					"param":      path,
 				}).Warn("payload rule dropped: invalid raw JSON")
 				invalid = true
 				break
 			}
 		}
 		if invalid {
 			continue
 		}
 		out = append(out, rule)
 	}
 	return out
 }
 func payloadRawString(value any) ([]byte, bool) {
 	switch typed := value.(type) {
 	case string:
 		return []byte(typed), true
 	case []byte:
 		return typed, true
 	default:
 		return nil, false
 	}
 }
 // SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases.
 // It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
 // allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
 func (cfg *Config) SanitizeOAuthModelAlias() {
 	if cfg == nil || len(cfg.OAuthModelAlias) == 0 {
 		return
 	}
 	out := make(map[string][]OAuthModelAlias, len(cfg.OAuthModelAlias))
 	for rawChannel, aliases := range cfg.OAuthModelAlias {
 		channel := strings.ToLower(strings.TrimSpace(rawChannel))
 		if channel == "" || len(aliases) == 0 {
 			continue
 		}
 		seenAlias := make(map[string]struct{}, len(aliases))
 		clean := make([]OAuthModelAlias, 0, len(aliases))
 		for _, entry := range aliases {
 			name := strings.TrimSpace(entry.Name)
 			alias := strings.TrimSpace(entry.Alias)
 			if name == "" || alias == "" {
 				continue
 			}
@@ -548,13 +655,13 @@ func (cfg *Config) SanitizeOAuthModelMappings() {
 				continue
 			}
 			seenAlias[aliasKey] = struct{}{}
-			clean = append(clean, ModelNameMapping{Name: name, Alias: alias, Fork: mapping.Fork})
+			clean = append(clean, OAuthModelAlias{Name: name, Alias: alias, Fork: entry.Fork})
 		}
 		if len(clean) > 0 {
 			out[channel] = clean
 		}
 	}
-	cfg.OAuthModelMappings = out
+	cfg.OAuthModelAlias = out
 }
 // SanitizeOpenAICompatibility removes OpenAI-compatibility provider entries that are
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -0,0 +1,275 @@
 package config
 import (
 	"os"
 	"strings"
 	"gopkg.in/yaml.v3"
 )
 // antigravityModelConversionTable maps old built-in aliases to actual model names
 // for the antigravity channel during migration.
 var antigravityModelConversionTable = map[string]string{
 	"gemini-2.5-computer-use-preview-10-2025": "rev19-uic3-1p",
 	"gemini-3-pro-image-preview":              "gemini-3-pro-image",
 	"gemini-3-pro-preview":                    "gemini-3-pro-high",
 	"gemini-3-flash-preview":                  "gemini-3-flash",
 	"gemini-claude-sonnet-4-5":                "claude-sonnet-4-5",
 	"gemini-claude-sonnet-4-5-thinking":       "claude-sonnet-4-5-thinking",
 	"gemini-claude-opus-4-5-thinking":         "claude-opus-4-5-thinking",
 }
 // defaultAntigravityAliases returns the default oauth-model-alias configuration
 // for the antigravity channel when neither field exists.
 func defaultAntigravityAliases() []OAuthModelAlias {
 	return []OAuthModelAlias{
 		{Name: "rev19-uic3-1p", Alias: "gemini-2.5-computer-use-preview-10-2025"},
 		{Name: "gemini-3-pro-image", Alias: "gemini-3-pro-image-preview"},
 		{Name: "gemini-3-pro-high", Alias: "gemini-3-pro-preview"},
 		{Name: "gemini-3-flash", Alias: "gemini-3-flash-preview"},
 		{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
 		{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
 		{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
 	}
 }
 // MigrateOAuthModelAlias checks for and performs migration from oauth-model-mappings
 // to oauth-model-alias at startup. Returns true if migration was performed.
 //
 // Migration flow:
 // 1. Check if oauth-model-alias exists -> skip migration
 // 2. Check if oauth-model-mappings exists -> convert and migrate
 //   - For antigravity channel, convert old built-in aliases to actual model names
 //
 // 3. Neither exists -> add default antigravity config
 func MigrateOAuthModelAlias(configFile string) (bool, error) {
 	data, err := os.ReadFile(configFile)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return false, nil
 		}
 		return false, err
 	}
 	if len(data) == 0 {
 		return false, nil
 	}
 	// Parse YAML into node tree to preserve structure
 	var root yaml.Node
 	if err := yaml.Unmarshal(data, &root); err != nil {
 		return false, nil
 	}
 	if root.Kind != yaml.DocumentNode || len(root.Content) == 0 {
 		return false, nil
 	}
 	rootMap := root.Content[0]
 	if rootMap == nil || rootMap.Kind != yaml.MappingNode {
 		return false, nil
 	}
 	// Check if oauth-model-alias already exists
 	if findMapKeyIndex(rootMap, "oauth-model-alias") >= 0 {
 		return false, nil
 	}
 	// Check if oauth-model-mappings exists
 	oldIdx := findMapKeyIndex(rootMap, "oauth-model-mappings")
 	if oldIdx >= 0 {
 		// Migrate from old field
 		return migrateFromOldField(configFile, &root, rootMap, oldIdx)
 	}
 	// Neither field exists - add default antigravity config
 	return addDefaultAntigravityConfig(configFile, &root, rootMap)
 }
 // migrateFromOldField converts oauth-model-mappings to oauth-model-alias
 func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node, oldIdx int) (bool, error) {
 	if oldIdx+1 >= len(rootMap.Content) {
 		return false, nil
 	}
 	oldValue := rootMap.Content[oldIdx+1]
 	if oldValue == nil || oldValue.Kind != yaml.MappingNode {
 		return false, nil
 	}
 	// Parse the old aliases
 	oldAliases := parseOldAliasNode(oldValue)
 	if len(oldAliases) == 0 {
 		// Remove the old field and write
 		removeMapKeyByIndex(rootMap, oldIdx)
 		return writeYAMLNode(configFile, root)
 	}
 	// Convert model names for antigravity channel
 	newAliases := make(map[string][]OAuthModelAlias, len(oldAliases))
 	for channel, entries := range oldAliases {
 		converted := make([]OAuthModelAlias, 0, len(entries))
 		for _, entry := range entries {
 			newEntry := OAuthModelAlias{
 				Name:  entry.Name,
 				Alias: entry.Alias,
 				Fork:  entry.Fork,
 			}
 			// Convert model names for antigravity channel
 			if strings.EqualFold(channel, "antigravity") {
 				if actual, ok := antigravityModelConversionTable[entry.Name]; ok {
 					newEntry.Name = actual
 				}
 			}
 			converted = append(converted, newEntry)
 		}
 		newAliases[channel] = converted
 	}
 	// For antigravity channel, supplement missing default aliases
 	if antigravityEntries, exists := newAliases["antigravity"]; exists {
 		// Build a set of already configured model names (upstream names)
 		configuredModels := make(map[string]bool, len(antigravityEntries))
 		for _, entry := range antigravityEntries {
 			configuredModels[entry.Name] = true
 		}
 		// Add missing default aliases
 		for _, defaultAlias := range defaultAntigravityAliases() {
 			if !configuredModels[defaultAlias.Name] {
 				antigravityEntries = append(antigravityEntries, defaultAlias)
 			}
 		}
 		newAliases["antigravity"] = antigravityEntries
 	}
 	// Build new node
 	newNode := buildOAuthModelAliasNode(newAliases)
 	// Replace old key with new key and value
 	rootMap.Content[oldIdx].Value = "oauth-model-alias"
 	rootMap.Content[oldIdx+1] = newNode
 	return writeYAMLNode(configFile, root)
 }
 // addDefaultAntigravityConfig adds the default antigravity configuration
 func addDefaultAntigravityConfig(configFile string, root *yaml.Node, rootMap *yaml.Node) (bool, error) {
 	defaults := map[string][]OAuthModelAlias{
 		"antigravity": defaultAntigravityAliases(),
 	}
 	newNode := buildOAuthModelAliasNode(defaults)
 	// Add new key-value pair
 	keyNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "oauth-model-alias"}
 	rootMap.Content = append(rootMap.Content, keyNode, newNode)
 	return writeYAMLNode(configFile, root)
 }
 // parseOldAliasNode parses the old oauth-model-mappings node structure
 func parseOldAliasNode(node *yaml.Node) map[string][]OAuthModelAlias {
 	if node == nil || node.Kind != yaml.MappingNode {
 		return nil
 	}
 	result := make(map[string][]OAuthModelAlias)
 	for i := 0; i+1 < len(node.Content); i += 2 {
 		channelNode := node.Content[i]
 		entriesNode := node.Content[i+1]
 		if channelNode == nil || entriesNode == nil {
 			continue
 		}
 		channel := strings.ToLower(strings.TrimSpace(channelNode.Value))
 		if channel == "" || entriesNode.Kind != yaml.SequenceNode {
 			continue
 		}
 		entries := make([]OAuthModelAlias, 0, len(entriesNode.Content))
 		for _, entryNode := range entriesNode.Content {
 			if entryNode == nil || entryNode.Kind != yaml.MappingNode {
 				continue
 			}
 			entry := parseAliasEntry(entryNode)
 			if entry.Name != "" && entry.Alias != "" {
 				entries = append(entries, entry)
 			}
 		}
 		if len(entries) > 0 {
 			result[channel] = entries
 		}
 	}
 	return result
 }
 // parseAliasEntry parses a single alias entry node
 func parseAliasEntry(node *yaml.Node) OAuthModelAlias {
 	var entry OAuthModelAlias
 	for i := 0; i+1 < len(node.Content); i += 2 {
 		keyNode := node.Content[i]
 		valNode := node.Content[i+1]
 		if keyNode == nil || valNode == nil {
 			continue
 		}
 		switch strings.ToLower(strings.TrimSpace(keyNode.Value)) {
 		case "name":
 			entry.Name = strings.TrimSpace(valNode.Value)
 		case "alias":
 			entry.Alias = strings.TrimSpace(valNode.Value)
 		case "fork":
 			entry.Fork = strings.ToLower(strings.TrimSpace(valNode.Value)) == "true"
 		}
 	}
 	return entry
 }
 // buildOAuthModelAliasNode creates a YAML node for oauth-model-alias
 func buildOAuthModelAliasNode(aliases map[string][]OAuthModelAlias) *yaml.Node {
 	node := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
 	for channel, entries := range aliases {
 		channelNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: channel}
 		entriesNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
 		for _, entry := range entries {
 			entryNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
 			entryNode.Content = append(entryNode.Content,
 				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "name"},
 				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Name},
 				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "alias"},
 				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Alias},
 			)
 			if entry.Fork {
 				entryNode.Content = append(entryNode.Content,
 					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "fork"},
 					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!bool", Value: "true"},
 				)
 			}
 			entriesNode.Content = append(entriesNode.Content, entryNode)
 		}
 		node.Content = append(node.Content, channelNode, entriesNode)
 	}
 	return node
 }
 // removeMapKeyByIndex removes a key-value pair from a mapping node by index
 func removeMapKeyByIndex(mapNode *yaml.Node, keyIdx int) {
 	if mapNode == nil || mapNode.Kind != yaml.MappingNode {
 		return
 	}
 	if keyIdx < 0 || keyIdx+1 >= len(mapNode.Content) {
 		return
 	}
 	mapNode.Content = append(mapNode.Content[:keyIdx], mapNode.Content[keyIdx+2:]...)
 }
 // writeYAMLNode writes the YAML node tree back to file
 func writeYAMLNode(configFile string, root *yaml.Node) (bool, error) {
 	f, err := os.Create(configFile)
 	if err != nil {
 		return false, err
 	}
 	defer f.Close()
 	enc := yaml.NewEncoder(f)
 	enc.SetIndent(2)
 	if err := enc.Encode(root); err != nil {
 		return false, err
 	}
 	if err := enc.Close(); err != nil {
 		return false, err
 	}
 	return true, nil
 }
--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -0,0 +1,242 @@
 package config
 import (
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 	"gopkg.in/yaml.v3"
 )
 func TestMigrateOAuthModelAlias_SkipsIfNewFieldExists(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	configFile := filepath.Join(dir, "config.yaml")
 	content := `oauth-model-alias:
  gemini-cli:
    - name: "gemini-2.5-pro"
      alias: "g2.5p"
 `
 	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
 		t.Fatal(err)
 	}
 	migrated, err := MigrateOAuthModelAlias(configFile)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if migrated {
 		t.Fatal("expected no migration when oauth-model-alias already exists")
 	}
 	// Verify file unchanged
 	data, _ := os.ReadFile(configFile)
 	if !strings.Contains(string(data), "oauth-model-alias:") {
 		t.Fatal("file should still contain oauth-model-alias")
 	}
 }
 func TestMigrateOAuthModelAlias_MigratesOldField(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	configFile := filepath.Join(dir, "config.yaml")
 	content := `oauth-model-mappings:
  gemini-cli:
    - name: "gemini-2.5-pro"
      alias: "g2.5p"
      fork: true
 `
 	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
 		t.Fatal(err)
 	}
 	migrated, err := MigrateOAuthModelAlias(configFile)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if !migrated {
 		t.Fatal("expected migration to occur")
 	}
 	// Verify new field exists and old field removed
 	data, _ := os.ReadFile(configFile)
 	if strings.Contains(string(data), "oauth-model-mappings:") {
 		t.Fatal("old field should be removed")
 	}
 	if !strings.Contains(string(data), "oauth-model-alias:") {
 		t.Fatal("new field should exist")
 	}
 	// Parse and verify structure
 	var root yaml.Node
 	if err := yaml.Unmarshal(data, &root); err != nil {
 		t.Fatal(err)
 	}
 }
 func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	configFile := filepath.Join(dir, "config.yaml")
 	// Use old model names that should be converted
 	content := `oauth-model-mappings:
  antigravity:
    - name: "gemini-2.5-computer-use-preview-10-2025"
      alias: "computer-use"
    - name: "gemini-3-pro-preview"
      alias: "g3p"
 `
 	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
 		t.Fatal(err)
 	}
 	migrated, err := MigrateOAuthModelAlias(configFile)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if !migrated {
 		t.Fatal("expected migration to occur")
 	}
 	// Verify model names were converted
 	data, _ := os.ReadFile(configFile)
 	content = string(data)
 	if !strings.Contains(content, "rev19-uic3-1p") {
 		t.Fatal("expected gemini-2.5-computer-use-preview-10-2025 to be converted to rev19-uic3-1p")
 	}
 	if !strings.Contains(content, "gemini-3-pro-high") {
 		t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high")
 	}
 	// Verify missing default aliases were supplemented
 	if !strings.Contains(content, "gemini-3-pro-image") {
 		t.Fatal("expected missing default alias gemini-3-pro-image to be added")
 	}
 	if !strings.Contains(content, "gemini-3-flash") {
 		t.Fatal("expected missing default alias gemini-3-flash to be added")
 	}
 	if !strings.Contains(content, "claude-sonnet-4-5") {
 		t.Fatal("expected missing default alias claude-sonnet-4-5 to be added")
 	}
 	if !strings.Contains(content, "claude-sonnet-4-5-thinking") {
 		t.Fatal("expected missing default alias claude-sonnet-4-5-thinking to be added")
 	}
 	if !strings.Contains(content, "claude-opus-4-5-thinking") {
 		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
 	}
 }
 func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	configFile := filepath.Join(dir, "config.yaml")
 	content := `debug: true
 port: 8080
 `
 	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
 		t.Fatal(err)
 	}
 	migrated, err := MigrateOAuthModelAlias(configFile)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if !migrated {
 		t.Fatal("expected migration to add default config")
 	}
 	// Verify default antigravity config was added
 	data, _ := os.ReadFile(configFile)
 	content = string(data)
 	if !strings.Contains(content, "oauth-model-alias:") {
 		t.Fatal("expected oauth-model-alias to be added")
 	}
 	if !strings.Contains(content, "antigravity:") {
 		t.Fatal("expected antigravity channel to be added")
 	}
 	if !strings.Contains(content, "rev19-uic3-1p") {
 		t.Fatal("expected default antigravity aliases to include rev19-uic3-1p")
 	}
 }
 func TestMigrateOAuthModelAlias_PreservesOtherConfig(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	configFile := filepath.Join(dir, "config.yaml")
 	content := `debug: true
 port: 8080
 oauth-model-mappings:
  gemini-cli:
    - name: "test"
      alias: "t"
 api-keys:
  - "key1"
  - "key2"
 `
 	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
 		t.Fatal(err)
 	}
 	migrated, err := MigrateOAuthModelAlias(configFile)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if !migrated {
 		t.Fatal("expected migration to occur")
 	}
 	// Verify other config preserved
 	data, _ := os.ReadFile(configFile)
 	content = string(data)
 	if !strings.Contains(content, "debug: true") {
 		t.Fatal("expected debug field to be preserved")
 	}
 	if !strings.Contains(content, "port: 8080") {
 		t.Fatal("expected port field to be preserved")
 	}
 	if !strings.Contains(content, "api-keys:") {
 		t.Fatal("expected api-keys field to be preserved")
 	}
 }
 func TestMigrateOAuthModelAlias_NonexistentFile(t *testing.T) {
 	t.Parallel()
 	migrated, err := MigrateOAuthModelAlias("/nonexistent/path/config.yaml")
 	if err != nil {
 		t.Fatalf("unexpected error for nonexistent file: %v", err)
 	}
 	if migrated {
 		t.Fatal("expected no migration for nonexistent file")
 	}
 }
 func TestMigrateOAuthModelAlias_EmptyFile(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	configFile := filepath.Join(dir, "config.yaml")
 	if err := os.WriteFile(configFile, []byte(""), 0644); err != nil {
 		t.Fatal(err)
 	}
 	migrated, err := MigrateOAuthModelAlias(configFile)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if migrated {
 		t.Fatal("expected no migration for empty file")
 	}
 }
--- a/internal/config/oauth_model_alias_test.go
+++ b/internal/config/oauth_model_alias_test.go
@@ -0,0 +1,56 @@
 package config
 import "testing"
 func TestSanitizeOAuthModelAlias_PreservesForkFlag(t *testing.T) {
 	cfg := &Config{
 		OAuthModelAlias: map[string][]OAuthModelAlias{
 			" CoDeX ": {
 				{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
 				{Name: "gpt-6", Alias: "g6"},
 			},
 		},
 	}
 	cfg.SanitizeOAuthModelAlias()
 	aliases := cfg.OAuthModelAlias["codex"]
 	if len(aliases) != 2 {
 		t.Fatalf("expected 2 sanitized aliases, got %d", len(aliases))
 	}
 	if aliases[0].Name != "gpt-5" || aliases[0].Alias != "g5" || !aliases[0].Fork {
 		t.Fatalf("expected first alias to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", aliases[0].Name, aliases[0].Alias, aliases[0].Fork)
 	}
 	if aliases[1].Name != "gpt-6" || aliases[1].Alias != "g6" || aliases[1].Fork {
 		t.Fatalf("expected second alias to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", aliases[1].Name, aliases[1].Alias, aliases[1].Fork)
 	}
 }
 func TestSanitizeOAuthModelAlias_AllowsMultipleAliasesForSameName(t *testing.T) {
 	cfg := &Config{
 		OAuthModelAlias: map[string][]OAuthModelAlias{
 			"antigravity": {
 				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
 				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
 				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
 			},
 		},
 	}
 	cfg.SanitizeOAuthModelAlias()
 	aliases := cfg.OAuthModelAlias["antigravity"]
 	expected := []OAuthModelAlias{
 		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
 		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
 		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
 	}
 	if len(aliases) != len(expected) {
 		t.Fatalf("expected %d sanitized aliases, got %d", len(expected), len(aliases))
 	}
 	for i, exp := range expected {
 		if aliases[i].Name != exp.Name || aliases[i].Alias != exp.Alias || aliases[i].Fork != exp.Fork {
 			t.Fatalf("expected alias %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, aliases[i].Name, aliases[i].Alias, aliases[i].Fork)
 		}
 	}
 }
--- a/internal/config/oauth_model_mappings_test.go
+++ b/internal/config/oauth_model_mappings_test.go
@@ -1,56 +0,0 @@
 package config
 import "testing"
 func TestSanitizeOAuthModelMappings_PreservesForkFlag(t *testing.T) {
 	cfg := &Config{
 		OAuthModelMappings: map[string][]ModelNameMapping{
 			" CoDeX ": {
 				{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
 				{Name: "gpt-6", Alias: "g6"},
 			},
 		},
 	}
 	cfg.SanitizeOAuthModelMappings()
 	mappings := cfg.OAuthModelMappings["codex"]
 	if len(mappings) != 2 {
 		t.Fatalf("expected 2 sanitized mappings, got %d", len(mappings))
 	}
 	if mappings[0].Name != "gpt-5" || mappings[0].Alias != "g5" || !mappings[0].Fork {
 		t.Fatalf("expected first mapping to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", mappings[0].Name, mappings[0].Alias, mappings[0].Fork)
 	}
 	if mappings[1].Name != "gpt-6" || mappings[1].Alias != "g6" || mappings[1].Fork {
 		t.Fatalf("expected second mapping to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", mappings[1].Name, mappings[1].Alias, mappings[1].Fork)
 	}
 }
 func TestSanitizeOAuthModelMappings_AllowsMultipleAliasesForSameName(t *testing.T) {
 	cfg := &Config{
 		OAuthModelMappings: map[string][]ModelNameMapping{
 			"antigravity": {
 				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
 				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
 				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
 			},
 		},
 	}
 	cfg.SanitizeOAuthModelMappings()
 	mappings := cfg.OAuthModelMappings["antigravity"]
 	expected := []ModelNameMapping{
 		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
 		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
 		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
 	}
 	if len(mappings) != len(expected) {
 		t.Fatalf("expected %d sanitized mappings, got %d", len(expected), len(mappings))
 	}
 	for i, exp := range expected {
 		if mappings[i].Name != exp.Name || mappings[i].Alias != exp.Alias || mappings[i].Fork != exp.Fork {
 			t.Fatalf("expected mapping %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, mappings[i].Name, mappings[i].Alias, mappings[i].Fork)
 		}
 	}
 }
--- a/internal/config/vertex_compat.go
+++ b/internal/config/vertex_compat.go
@@ -13,6 +13,10 @@ type VertexCompatKey struct {
 	// Maps to the x-goog-api-key header.
 	APIKey string `yaml:"api-key" json:"api-key"`
 	// Priority controls selection preference when multiple credentials match.
 	// Higher values are preferred; defaults to 0.
 	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
 	// Prefix optionally namespaces model aliases for this credential (e.g., "teamA/vertex-pro").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
@@ -32,6 +36,9 @@ type VertexCompatKey struct {
 	Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"`
 }
 func (k VertexCompatKey) GetAPIKey() string  { return k.APIKey }
 func (k VertexCompatKey) GetBaseURL() string { return k.BaseURL }
 // VertexCompatModel represents a model configuration for Vertex compatibility,
 // including the actual model name and its alias for API routing.
 type VertexCompatModel struct {
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -29,6 +29,9 @@ var (
 // Format: [2025-12-23 20:14:04] [debug] [manager.go:524] | a1b2c3d4 | Use API key sk-9...0RHO for model gpt-5.2
 type LogFormatter struct{}
 // logFieldOrder defines the display order for common log fields.
 var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_mode", "original_value", "min", "max", "clamped_to", "error"}
 // Format renders a single log entry with custom formatting.
 func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	var buffer *bytes.Buffer
@@ -52,11 +55,25 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	}
 	levelStr := fmt.Sprintf("%-5s", level)
 	// Build fields string (only print fields in logFieldOrder)
 	var fieldsStr string
 	if len(entry.Data) > 0 {
 		var fields []string
 		for _, k := range logFieldOrder {
 			if v, ok := entry.Data[k]; ok {
 				fields = append(fields, fmt.Sprintf("%s=%v", k, v))
 			}
 		}
 		if len(fields) > 0 {
 			fieldsStr = " " + strings.Join(fields, " ")
 		}
 	}
 	var formatted string
 	if entry.Caller != nil {
-		formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message)
+		formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s%s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message, fieldsStr)
 	} else {
-		formatted = fmt.Sprintf("[%s] [%s] [%s] %s\n", timestamp, reqID, levelStr, message)
+		formatted = fmt.Sprintf("[%s] [%s] [%s] %s%s\n", timestamp, reqID, levelStr, message, fieldsStr)
 	}
 	buffer.WriteString(formatted)
@@ -104,6 +121,24 @@ func isDirWritable(dir string) bool {
 	return true
 }
 // ResolveLogDirectory determines the directory used for application logs.
 func ResolveLogDirectory(cfg *config.Config) string {
 	logDir := "logs"
 	if base := util.WritablePath(); base != "" {
 		return filepath.Join(base, "logs")
 	}
 	if cfg == nil {
 		return logDir
 	}
 	if !isDirWritable(logDir) {
 		authDir := strings.TrimSpace(cfg.AuthDir)
 		if authDir != "" {
 			logDir = filepath.Join(authDir, "logs")
 		}
 	}
 	return logDir
 }
 // ConfigureLogOutput switches the global log destination between rotating files and stdout.
 // When logsMaxTotalSizeMB > 0, a background cleaner removes the oldest log files in the logs directory
 // until the total size is within the limit.
@@ -113,12 +148,7 @@ func ConfigureLogOutput(cfg *config.Config) error {
 	writerMu.Lock()
 	defer writerMu.Unlock()
-	logDir := "logs"
+	logDir := ResolveLogDirectory(cfg)
 	if base := util.WritablePath(); base != "" {
 		logDir = filepath.Join(base, "logs")
 	} else if !isDirWritable(logDir) {
 		logDir = filepath.Join(cfg.AuthDir, "logs")
 	}
 	protectedPath := ""
 	if cfg.LoggingToFile {
--- a/internal/misc/codex_instructions.go
+++ b/internal/misc/codex_instructions.go
@@ -7,11 +7,27 @@ import (
 	"embed"
 	_ "embed"
 	"strings"
 	"sync/atomic"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // codexInstructionsEnabled controls whether CodexInstructionsForModel returns official instructions.
 // When false (default), CodexInstructionsForModel returns (true, "") immediately.
 // Set via SetCodexInstructionsEnabled from config.
 var codexInstructionsEnabled atomic.Bool
 // SetCodexInstructionsEnabled sets whether codex instructions processing is enabled.
 func SetCodexInstructionsEnabled(enabled bool) {
 	codexInstructionsEnabled.Store(enabled)
 }
 // GetCodexInstructionsEnabled returns whether codex instructions processing is enabled.
 func GetCodexInstructionsEnabled() bool {
 	return codexInstructionsEnabled.Load()
 }
 //go:embed codex_instructions
 var codexInstructionsDir embed.FS
@@ -124,6 +140,9 @@ func codexInstructionsForCodex(modelName, systemInstructions string) (bool, stri
 }
 func CodexInstructionsForModel(modelName, systemInstructions, userAgent string) (bool, string) {
 	if !GetCodexInstructionsEnabled() {
 		return true, ""
 	}
 	if IsOpenCodeUserAgent(userAgent) {
 		return codexInstructionsForOpenCode(systemInstructions)
 	}
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -27,7 +27,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-5-20251101",
@@ -39,7 +39,7 @@ func GetClaudeModels() []*ModelInfo {
 			Description:         "Premium model combining maximum intelligence with practical performance",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-1-20250805",
@@ -50,7 +50,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.1 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-20250514",
@@ -61,7 +61,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-sonnet-4-20250514",
@@ -72,7 +72,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-7-sonnet-20250219",
@@ -83,7 +83,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.7 Sonnet",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-5-haiku-20241022",
@@ -287,6 +287,67 @@ func GetGeminiVertexModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		// Imagen image generation models - use :predict action
 		{
 			ID:                         "imagen-4.0-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Generate",
 			Description:                "Imagen 4.0 image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-4.0-ultra-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-ultra-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Ultra Generate",
 			Description:                "Imagen 4.0 Ultra high-quality image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-3.0-generate-002",
 			Object:                     "model",
 			Created:                    1740000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-3.0-generate-002",
 			Version:                    "3.0",
 			DisplayName:                "Imagen 3.0 Generate",
 			Description:                "Imagen 3.0 image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-3.0-fast-generate-001",
 			Object:                     "model",
 			Created:                    1740000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-3.0-fast-generate-001",
 			Version:                    "3.0",
 			DisplayName:                "Imagen 3.0 Fast Generate",
 			Description:                "Imagen 3.0 fast image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-4.0-fast-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-fast-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Fast Generate",
 			Description:                "Imagen 4.0 fast image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 	}
 }
@@ -432,7 +493,7 @@ func GetAIStudioModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
@@ -447,7 +508,7 @@ func GetAIStudioModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-pro-latest",
@@ -742,6 +803,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
 		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
@@ -764,21 +826,23 @@ func GetIFlowModels() []*ModelInfo {
 type AntigravityModelConfig struct {
 	Thinking            *ThinkingSupport
 	MaxCompletionTokens int
 	Name                string
 }
 // GetAntigravityModelConfig returns static configuration for antigravity models.
-// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
+// Keys use upstream model names returned by the Antigravity models endpoint.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
+		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
+		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
+		"rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
-		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
+		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
+		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3-flash-preview":                  {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
+		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
-		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
 		"gpt-oss-120b-medium":        {},
 		"tab_flash_lite_preview":     {},
 	}
 }
@@ -788,6 +852,7 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 	if modelID == "" {
 		return nil
 	}
 	allModels := [][]*ModelInfo{
 		GetClaudeModels(),
 		GetGeminiModels(),
@@ -805,5 +870,15 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 			}
 		}
 	}
 	// Check Antigravity static config
 	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil {
 		return &ModelInfo{
 			ID:                  modelID,
 			Thinking:            cfg.Thinking,
 			MaxCompletionTokens: cfg.MaxCompletionTokens,
 		}
 	}
 	return nil
 }
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -51,6 +51,11 @@ type ModelInfo struct {
 	// Thinking holds provider-specific reasoning/thinking budget capabilities.
 	// This is optional and currently used for Gemini thinking budget normalization.
 	Thinking *ThinkingSupport `json:"thinking,omitempty"`
 	// UserDefined indicates this model was defined through config file's models[]
 	// array (e.g., openai-compatibility.*.models[], *-api-key.models[]).
 	// UserDefined models have thinking configuration passed through without validation.
 	UserDefined bool `json:"-"`
 }
 // ThinkingSupport describes a model family's supported internal reasoning budget range.
@@ -127,6 +132,21 @@ func GetGlobalRegistry() *ModelRegistry {
 	return globalRegistry
 }
 // LookupModelInfo searches the dynamic registry first, then falls back to static model definitions.
 //
 // This helper exists because some code paths only have a model ID and still need Thinking and
 // max completion token metadata even when the dynamic registry hasn't been populated.
 func LookupModelInfo(modelID string) *ModelInfo {
 	modelID = strings.TrimSpace(modelID)
 	if modelID == "" {
 		return nil
 	}
 	if info := GetGlobalRegistry().GetModelInfo(modelID); info != nil {
 		return info
 	}
 	return LookupStaticModelInfo(modelID)
 }
 // SetHook sets an optional hook for observing model registration changes.
 func (r *ModelRegistry) SetHook(hook ModelRegistryHook) {
 	if r == nil {
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -14,7 +14,7 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -111,7 +111,8 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A
 // Execute performs a non-streaming request to the AI Studio API.
 func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	translatedReq, body, err := e.translateRequest(req, opts, false)
@@ -119,7 +120,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		return resp, err
 	}
-	endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
+	endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -166,7 +167,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 // ExecuteStream performs a streaming request to the AI Studio API.
 func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	translatedReq, body, err := e.translateRequest(req, opts, true)
@@ -174,7 +176,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		return nil, err
 	}
-	endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
+	endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -315,6 +317,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 // CountTokens counts tokens for the given request using the AI Studio API.
 func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	_, body, err := e.translateRequest(req, opts, false)
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
@@ -324,7 +327,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	body.payload, _ = sjson.DeleteBytes(body.payload, "tools")
 	body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings")
-	endpoint := e.buildEndpoint(req.Model, "countTokens", "")
+	endpoint := e.buildEndpoint(baseModel, "countTokens", "")
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -380,22 +383,22 @@ type translatedPayload struct {
 }
 func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
-	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
-	payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
+	if err != nil {
-	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
+		return nil, translatedPayload{}, err
-	payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true)
+	}
-	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
+	payload = fixGeminiImageAspectRatio(baseModel, payload)
-	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
+	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated)
 	payload = fixGeminiImageAspectRatio(req.Model, payload)
 	payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -24,7 +24,9 @@ import (
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -107,8 +109,10 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut
 // Execute performs a non-streaming request to the Antigravity API.
 func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
-	if isClaude || strings.Contains(req.Model, "gemini-3-pro") {
+	isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
 	if isClaude || strings.Contains(baseModel, "gemini-3-pro") {
 		return e.executeClaudeNonStream(ctx, auth, req, opts)
 	}
@@ -120,23 +124,25 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 		auth = updatedAuth
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
-	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
+	if err != nil {
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
+		return resp, err
-	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
+	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
+
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -146,7 +152,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	var lastErr error
 	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL)
+		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
 		if errReq != nil {
 			err = errReq
 			return resp, err
@@ -227,6 +233,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 // executeClaudeNonStream performs a claude non-streaming request to the Antigravity API.
 func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return resp, errToken
@@ -235,23 +243,25 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 		auth = updatedAuth
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
-	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
+	if err != nil {
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
+		return resp, err
-	translated = normalizeAntigravityThinking(req.Model, translated, true)
+	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
+
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -261,7 +271,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	var lastErr error
 	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
+		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
 		if errReq != nil {
 			err = errReq
 			return resp, err
@@ -507,8 +517,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
 		}
 		if usageResult := responseNode.Get("usageMetadata"); usageResult.Exists() {
 			usageRaw = usageResult.Raw
-		} else if usageResult := root.Get("usageMetadata"); usageResult.Exists() {
+		} else if usageMetadataResult := root.Get("usageMetadata"); usageMetadataResult.Exists() {
-			usageRaw = usageResult.Raw
+			usageRaw = usageMetadataResult.Raw
 		}
 		if partsResult := responseNode.Get("candidates.0.content.parts"); partsResult.IsArray() {
@@ -587,6 +597,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
 // ExecuteStream performs a streaming request to the Antigravity API.
 func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	ctx = context.WithValue(ctx, "alt", "")
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
@@ -597,25 +609,25 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 		auth = updatedAuth
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
-	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
+	if err != nil {
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
+		return nil, err
-	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
+	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
+
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -625,12 +637,11 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	var lastErr error
 	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
+		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
 		if errReq != nil {
 			err = errReq
 			return nil, err
 		}
 		httpResp, errDo := httpClient.Do(httpReq)
 		if errDo != nil {
 			recordAPIResponseError(ctx, e.cfg, errDo)
@@ -771,6 +782,8 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
 // CountTokens counts tokens for the given request using the Antigravity API.
 func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return cliproxyexecutor.Response{}, errToken
@@ -786,7 +799,17 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	to := sdktranslator.FromString("antigravity")
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
-	isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
+	// Prepare payload once (doesn't depend on baseURL)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
 	payload = deleteJSONField(payload, "project")
 	payload = deleteJSONField(payload, "model")
 	payload = deleteJSONField(payload, "request.safetySettings")
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -803,14 +826,6 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	var lastErr error
 	for idx, baseURL := range baseURLs {
 		payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
 		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
 		payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
 		payload = deleteJSONField(payload, "request.safetySettings")
 		base := strings.TrimSuffix(baseURL, "/")
 		if base == "" {
 			base = buildBaseURL(auth)
@@ -980,35 +995,37 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		modelConfig := registry.GetAntigravityModelConfig()
 		models := make([]*registry.ModelInfo, 0, len(result.Map()))
 		for originalName := range result.Map() {
-			aliasName := modelName2Alias(originalName)
+			modelID := strings.TrimSpace(originalName)
-			if aliasName != "" {
+			if modelID == "" {
-				cfg := modelConfig[aliasName]
+				continue
 				modelName := aliasName
 				if cfg != nil && cfg.Name != "" {
 					modelName = cfg.Name
 				}
 				modelInfo := &registry.ModelInfo{
 					ID:          aliasName,
 					Name:        modelName,
 					Description: aliasName,
 					DisplayName: aliasName,
 					Version:     aliasName,
 					Object:      "model",
 					Created:     now,
 					OwnedBy:     antigravityAuthType,
 					Type:        antigravityAuthType,
 				}
 				// Look up Thinking support from static config using alias name
 				if cfg != nil {
 					if cfg.Thinking != nil {
 						modelInfo.Thinking = cfg.Thinking
 					}
 					if cfg.MaxCompletionTokens > 0 {
 						modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
 					}
 				}
 				models = append(models, modelInfo)
 			}
 			switch modelID {
 			case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
 				continue
 			}
 			modelCfg := modelConfig[modelID]
 			modelName := modelID
 			modelInfo := &registry.ModelInfo{
 				ID:          modelID,
 				Name:        modelName,
 				Description: modelID,
 				DisplayName: modelID,
 				Version:     modelID,
 				Object:      "model",
 				Created:     now,
 				OwnedBy:     antigravityAuthType,
 				Type:        antigravityAuthType,
 			}
 			// Look up Thinking support from static config using upstream model name.
 			if modelCfg != nil {
 				if modelCfg.Thinking != nil {
 					modelInfo.Thinking = modelCfg.Thinking
 				}
 				if modelCfg.MaxCompletionTokens > 0 {
 					modelInfo.MaxCompletionTokens = modelCfg.MaxCompletionTokens
 				}
 			}
 			models = append(models, modelInfo)
 		}
 		return models
 	}
@@ -1104,12 +1121,49 @@ func (e *AntigravityExecutor) refreshToken(ctx context.Context, auth *cliproxyau
 		auth.Metadata["refresh_token"] = tokenResp.RefreshToken
 	}
 	auth.Metadata["expires_in"] = tokenResp.ExpiresIn
-	auth.Metadata["timestamp"] = time.Now().UnixMilli()
+	now := time.Now()
-	auth.Metadata["expired"] = time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339)
+	auth.Metadata["timestamp"] = now.UnixMilli()
 	auth.Metadata["expired"] = now.Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339)
 	auth.Metadata["type"] = antigravityAuthType
 	if errProject := e.ensureAntigravityProjectID(ctx, auth, tokenResp.AccessToken); errProject != nil {
 		log.Warnf("antigravity executor: ensure project id failed: %v", errProject)
 	}
 	return auth, nil
 }
 func (e *AntigravityExecutor) ensureAntigravityProjectID(ctx context.Context, auth *cliproxyauth.Auth, accessToken string) error {
 	if auth == nil {
 		return nil
 	}
 	if auth.Metadata["project_id"] != nil {
 		return nil
 	}
 	token := strings.TrimSpace(accessToken)
 	if token == "" {
 		token = metaStringValue(auth.Metadata, "access_token")
 	}
 	if token == "" {
 		return nil
 	}
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	projectID, errFetch := sdkAuth.FetchAntigravityProjectID(ctx, token, httpClient)
 	if errFetch != nil {
 		return errFetch
 	}
 	if strings.TrimSpace(projectID) == "" {
 		return nil
 	}
 	if auth.Metadata == nil {
 		auth.Metadata = make(map[string]any)
 	}
 	auth.Metadata["project_id"] = strings.TrimSpace(projectID)
 	return nil
 }
 func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyauth.Auth, token, modelName string, payload []byte, stream bool, alt, baseURL string) (*http.Request, error) {
 	if token == "" {
 		return nil, statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
@@ -1146,7 +1200,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		}
 	}
 	payload = geminiToAntigravity(modelName, payload, projectID)
-	payload, _ = sjson.SetBytes(payload, "model", alias2ModelName(modelName))
+	payload, _ = sjson.SetBytes(payload, "model", modelName)
 	if strings.Contains(modelName, "claude") {
 		strJSON := string(payload)
@@ -1163,7 +1217,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		payload = []byte(strJSON)
 	}
-	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-preview") {
+	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
 		systemInstructionPartsResult := gjson.GetBytes(payload, "request.systemInstruction.parts")
 		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.role", "user")
 		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.0.text", systemInstruction)
@@ -1353,13 +1407,6 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
 	template, _ = sjson.Delete(template, "request.safetySettings")
 	template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
 	if !strings.HasPrefix(modelName, "gemini-3-") {
 		if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() {
 			template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel")
 			template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
 		}
 	}
 	if strings.Contains(modelName, "claude") {
 		gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
 			tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
@@ -1417,108 +1464,3 @@ func generateProjectID() string {
 	randomPart := strings.ToLower(uuid.NewString())[:5]
 	return adj + "-" + noun + "-" + randomPart
 }
 func modelName2Alias(modelName string) string {
 	switch modelName {
 	case "rev19-uic3-1p":
 		return "gemini-2.5-computer-use-preview-10-2025"
 	case "gemini-3-pro-image":
 		return "gemini-3-pro-image-preview"
 	case "gemini-3-pro-high":
 		return "gemini-3-pro-preview"
 	case "gemini-3-flash":
 		return "gemini-3-flash-preview"
 	case "claude-sonnet-4-5":
 		return "gemini-claude-sonnet-4-5"
 	case "claude-sonnet-4-5-thinking":
 		return "gemini-claude-sonnet-4-5-thinking"
 	case "claude-opus-4-5-thinking":
 		return "gemini-claude-opus-4-5-thinking"
 	case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
 		return ""
 	default:
 		return modelName
 	}
 }
 func alias2ModelName(modelName string) string {
 	switch modelName {
 	case "gemini-2.5-computer-use-preview-10-2025":
 		return "rev19-uic3-1p"
 	case "gemini-3-pro-image-preview":
 		return "gemini-3-pro-image"
 	case "gemini-3-pro-preview":
 		return "gemini-3-pro-high"
 	case "gemini-3-flash-preview":
 		return "gemini-3-flash"
 	case "gemini-claude-sonnet-4-5":
 		return "claude-sonnet-4-5"
 	case "gemini-claude-sonnet-4-5-thinking":
 		return "claude-sonnet-4-5-thinking"
 	case "gemini-claude-opus-4-5-thinking":
 		return "claude-opus-4-5-thinking"
 	default:
 		return modelName
 	}
 }
 // normalizeAntigravityThinking clamps or removes thinking config based on model support.
 // For Claude models, it additionally ensures thinking budget < max_tokens.
 func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte {
 	payload = util.StripThinkingConfigIfUnsupported(model, payload)
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
 	budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
 	if !budget.Exists() {
 		return payload
 	}
 	raw := int(budget.Int())
 	normalized := util.NormalizeThinkingBudget(model, raw)
 	if isClaude {
 		effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
 		if effectiveMax > 0 && normalized >= effectiveMax {
 			normalized = effectiveMax - 1
 		}
 		minBudget := antigravityMinThinkingBudget(model)
 		if minBudget > 0 && normalized >= 0 && normalized < minBudget {
 			// Budget is below minimum, remove thinking config entirely
 			payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
 			return payload
 		}
 		if setDefaultMax {
 			if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
 				payload = res
 			}
 		}
 	}
 	updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
 	if err != nil {
 		return payload
 	}
 	return updated
 }
 // antigravityEffectiveMaxTokens returns the max tokens to cap thinking:
 // prefer request-provided maxOutputTokens; otherwise fall back to model default.
 // The boolean indicates whether the value came from the model default (and thus should be written back).
 func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) {
 	if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
 		return int(maxTok.Int()), false
 	}
 	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
 		return modelInfo.MaxCompletionTokens, true
 	}
 	return 0, false
 }
 // antigravityMinThinkingBudget returns the minimum thinking budget for a model.
 // Falls back to -1 if no model info is found.
 func antigravityMinThinkingBudget(model string) int {
 	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
 		return modelInfo.Thinking.Min
 	}
 	return -1
 }
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -17,7 +17,7 @@ import (
 	claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -84,17 +84,15 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
 }
 func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	model := req.Model
 	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
@@ -103,23 +101,23 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
-	body, _ = sjson.SetBytes(body, "model", model)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 	// Inject thinking config based on model metadata for thinking variants
 	body = e.injectThinkingConfig(model, req.Metadata, body)
-	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
 	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
 	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
 	body = ensureMaxTokensForThinking(model, body)
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -218,37 +216,36 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 }
 func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
 	model := req.Model
 	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
 		model = override
 	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	body, _ = sjson.SetBytes(body, "model", model)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
-	// Inject thinking config based on model metadata for thinking variants
+
-	body = e.injectThinkingConfig(model, req.Metadata, body)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
 	body = checkSystemInstructions(body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
 	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
 	body = ensureMaxTokensForThinking(model, body)
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -381,8 +378,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 }
 func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
@@ -391,14 +389,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	model := req.Model
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 		model = override
 	}
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", model)
-	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
@@ -527,17 +521,6 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
 	return betas, body
 }
 // injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
 // It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
 // and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
 func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
 	budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
 	if !ok {
 		return body
 	}
 	return util.ApplyClaudeThinkingConfig(body, budget)
 }
 // disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking.
 // Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool.
 // See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations
@@ -551,126 +534,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
 	return body
 }
 // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
 // Anthropic API requires this constraint; violating it returns a 400 error.
 // This function should be called after all thinking configuration is finalized.
 // It looks up the model's MaxCompletionTokens from the registry to use as the cap.
 func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
 	thinkingType := gjson.GetBytes(body, "thinking.type").String()
 	if thinkingType != "enabled" {
 		return body
 	}
 	budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
 	if budgetTokens <= 0 {
 		return body
 	}
 	maxTokens := gjson.GetBytes(body, "max_tokens").Int()
 	// Look up the model's max completion tokens from the registry
 	maxCompletionTokens := 0
 	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil {
 		maxCompletionTokens = modelInfo.MaxCompletionTokens
 	}
 	// Fall back to budget + buffer if registry lookup fails or returns 0
 	const fallbackBuffer = 4000
 	requiredMaxTokens := budgetTokens + fallbackBuffer
 	if maxCompletionTokens > 0 {
 		requiredMaxTokens = int64(maxCompletionTokens)
 	}
 	if maxTokens < requiredMaxTokens {
 		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
 	}
 	return body
 }
 func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
 	trimmed := strings.TrimSpace(alias)
 	if trimmed == "" {
 		return ""
 	}
 	entry := e.resolveClaudeConfig(auth)
 	if entry == nil {
 		return ""
 	}
 	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
 	// Candidate names to match against configured aliases/names.
 	candidates := []string{strings.TrimSpace(normalizedModel)}
 	if !strings.EqualFold(normalizedModel, trimmed) {
 		candidates = append(candidates, trimmed)
 	}
 	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
 		candidates = append(candidates, original)
 	}
 	for i := range entry.Models {
 		model := entry.Models[i]
 		name := strings.TrimSpace(model.Name)
 		modelAlias := strings.TrimSpace(model.Alias)
 		for _, candidate := range candidates {
 			if candidate == "" {
 				continue
 			}
 			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
 				if name != "" {
 					return name
 				}
 				return candidate
 			}
 			if name != "" && strings.EqualFold(name, candidate) {
 				return name
 			}
 		}
 	}
 	return ""
 }
 func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
 	if auth == nil || e.cfg == nil {
 		return nil
 	}
 	var attrKey, attrBase string
 	if auth.Attributes != nil {
 		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
 		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
 	}
 	for i := range e.cfg.ClaudeKey {
 		entry := &e.cfg.ClaudeKey[i]
 		cfgKey := strings.TrimSpace(entry.APIKey)
 		cfgBase := strings.TrimSpace(entry.BaseURL)
 		if attrKey != "" && attrBase != "" {
 			if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
 				return entry
 			}
 			continue
 		}
 		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
 			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
 				return entry
 			}
 		}
 		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
 			return entry
 		}
 	}
 	if attrKey != "" {
 		for i := range e.cfg.ClaudeKey {
 			entry := &e.cfg.ClaudeKey[i]
 			if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
 				return entry
 			}
 		}
 	}
 	return nil
 }
 type compositeReadCloser struct {
 	io.Reader
 	closers []func() error
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -13,6 +13,7 @@ import (
 	codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -72,18 +73,15 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
 }
 func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	apiKey, baseURL := codexCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, baseURL := codexCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)
-	model := req.Model
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
+	defer reporter.trackFailure(ctx, &err)
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -93,20 +91,25 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, false)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
+
-	body = NormalizeThinkingConfig(body, model, false)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
+	if err != nil {
-		return resp, errValidate
+		return resp, err
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+
-	body, _ = sjson.SetBytes(body, "model", model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
 	}
 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -182,18 +185,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 }
 func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	apiKey, baseURL := codexCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, baseURL := codexCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)
-	model := req.Model
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
+	defer reporter.trackFailure(ctx, &err)
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -203,20 +203,24 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, true)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
 	body = misc.StripCodexUserAgent(body)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	body = NormalizeThinkingConfig(body, model, false)
+	if err != nil {
-	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
+		return nil, err
 		return nil, errValidate
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
-	body, _ = sjson.SetBytes(body, "model", model)
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
 	}
 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -303,25 +307,30 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }
 func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	model := req.Model
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	userAgent := codexUserAgent(ctx)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, false)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
+	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	body, _ = sjson.SetBytes(body, "model", model)
+	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "stream", false)
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
 	}
-	enc, err := tokenizerForCodexModel(model)
+	enc, err := tokenizerForCodexModel(baseModel)
 	if err != nil {
 		return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err)
 	}
@@ -593,51 +602,6 @@ func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
 	return
 }
 func (e *CodexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
 	trimmed := strings.TrimSpace(alias)
 	if trimmed == "" {
 		return ""
 	}
 	entry := e.resolveCodexConfig(auth)
 	if entry == nil {
 		return ""
 	}
 	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
 	// Candidate names to match against configured aliases/names.
 	candidates := []string{strings.TrimSpace(normalizedModel)}
 	if !strings.EqualFold(normalizedModel, trimmed) {
 		candidates = append(candidates, trimmed)
 	}
 	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
 		candidates = append(candidates, original)
 	}
 	for i := range entry.Models {
 		model := entry.Models[i]
 		name := strings.TrimSpace(model.Name)
 		modelAlias := strings.TrimSpace(model.Alias)
 		for _, candidate := range candidates {
 			if candidate == "" {
 				continue
 			}
 			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
 				if name != "" {
 					return name
 				}
 				return candidate
 			}
 			if name != "" && strings.EqualFold(name, candidate) {
 				return name
 			}
 		}
 	}
 	return ""
 }
 func (e *CodexExecutor) resolveCodexConfig(auth *cliproxyauth.Auth) *config.CodexKey {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -20,6 +20,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -102,28 +103,33 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.
 // Execute performs a non-streaming request to the Gemini CLI API.
 func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return resp, err
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+
-	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String())
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
+	if err != nil {
-	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
+		return resp, err
-	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
+	}
-	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
+
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
+	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
 	action := "generateContent"
 	if req.Metadata != nil {
@@ -133,9 +139,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	}
 	projectID := resolveGeminiProjectID(auth)
-	models := cliPreviewFallbackOrder(req.Model)
+	models := cliPreviewFallbackOrder(baseModel)
-	if len(models) == 0 || models[0] != req.Model {
+	if len(models) == 0 || models[0] != baseModel {
-		models = append([]string{req.Model}, models...)
+		models = append([]string{baseModel}, models...)
 	}
 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -246,34 +252,39 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 // ExecuteStream performs a streaming request to the Gemini CLI API.
 func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return nil, err
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+
-	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String())
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
+	if err != nil {
-	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
+		return nil, err
-	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
+	}
-	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
+
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
+	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
 	projectID := resolveGeminiProjectID(auth)
-	models := cliPreviewFallbackOrder(req.Model)
+	models := cliPreviewFallbackOrder(baseModel)
-	if len(models) == 0 || models[0] != req.Model {
+	if len(models) == 0 || models[0] != baseModel {
-		models = append([]string{req.Model}, models...)
+		models = append([]string{baseModel}, models...)
 	}
 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -435,6 +446,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 // CountTokens counts tokens for the given request using the Gemini CLI API.
 func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
@@ -443,9 +456,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
-	models := cliPreviewFallbackOrder(req.Model)
+	models := cliPreviewFallbackOrder(baseModel)
-	if len(models) == 0 || models[0] != req.Model {
+	if len(models) == 0 || models[0] != baseModel {
-		models = append([]string{req.Model}, models...)
+		models = append([]string{baseModel}, models...)
 	}
 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -463,15 +476,18 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	// The loop variable attemptModel is only used as the concrete model id sent to the upstream
 	// Gemini CLI endpoint when iterating fallback variants.
-	for _, attemptModel := range models {
+	for range models {
-		payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
+		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
+
-		payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
+		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
 		if err != nil {
 			return cliproxyexecutor.Response{}, err
 		}
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
 		payload = deleteJSONField(payload, "request.safetySettings")
-		payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
+		payload = fixGeminiCLIImageAspectRatio(baseModel, payload)
 		payload = fixGeminiCLIImageAspectRatio(req.Model, payload)
 		tok, errTok := tokenSource.Token()
 		if errTok != nil {
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -13,6 +13,7 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -102,16 +103,13 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
 //   - cliproxyexecutor.Response: The response from the API
 //   - error: An error if the request fails
 func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, bearer := geminiCreds(auth)
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	model := req.Model
 	if override := e.resolveUpstreamModel(model, auth); override != "" {
 		model = override
 	}
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -119,15 +117,17 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	body = ApplyThinkingMetadata(body, req.Metadata, model)
+
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	body = util.NormalizeGeminiThinkingBudget(model, body)
+	if err != nil {
-	body = util.StripThinkingConfigIfUnsupported(model, body)
+		return resp, err
-	body = fixGeminiImageAspectRatio(model, body)
+	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+
-	body, _ = sjson.SetBytes(body, "model", model)
+	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	action := "generateContent"
 	if req.Metadata != nil {
@@ -136,7 +136,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		}
 	}
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action)
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -206,34 +206,33 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 // ExecuteStream performs a streaming request to the Gemini API.
 func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, bearer := geminiCreds(auth)
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	model := req.Model
 	if override := e.resolveUpstreamModel(model, auth); override != "" {
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	body = ApplyThinkingMetadata(body, req.Metadata, model)
+
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	body = util.NormalizeGeminiThinkingBudget(model, body)
+	if err != nil {
-	body = util.StripThinkingConfigIfUnsupported(model, body)
+		return nil, err
-	body = fixGeminiImageAspectRatio(model, body)
+	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+
-	body, _ = sjson.SetBytes(body, "model", model)
+	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -331,27 +330,28 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 // CountTokens counts tokens for the given request using the Gemini API.
 func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	apiKey, bearer := geminiCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
-	model := req.Model
+	apiKey, bearer := geminiCreds(auth)
 	if override := e.resolveUpstreamModel(model, auth); override != "" {
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model)
+
-	translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
-	translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
+	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
 	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "countTokens")
 	requestBody := bytes.NewReader(translatedReq)
@@ -450,51 +450,6 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string {
 	return base
 }
 func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
 	trimmed := strings.TrimSpace(alias)
 	if trimmed == "" {
 		return ""
 	}
 	entry := e.resolveGeminiConfig(auth)
 	if entry == nil {
 		return ""
 	}
 	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
 	// Candidate names to match against configured aliases/names.
 	candidates := []string{strings.TrimSpace(normalizedModel)}
 	if !strings.EqualFold(normalizedModel, trimmed) {
 		candidates = append(candidates, trimmed)
 	}
 	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
 		candidates = append(candidates, original)
 	}
 	for i := range entry.Models {
 		model := entry.Models[i]
 		name := strings.TrimSpace(model.Name)
 		modelAlias := strings.TrimSpace(model.Alias)
 		for _, candidate := range candidates {
 			if candidate == "" {
 				continue
 			}
 			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
 				if name != "" {
 					return name
 				}
 				return candidate
 			}
 			if name != "" && strings.EqualFold(name, candidate) {
 				return name
 			}
 		}
 	}
 	return ""
 }
 func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -12,10 +12,11 @@ import (
 	"io"
 	"net/http"
 	"strings"
 	"time"
 	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -31,6 +32,143 @@ const (
 	vertexAPIVersion = "v1"
 )
 // isImagenModel checks if the model name is an Imagen image generation model.
 // Imagen models use the :predict action instead of :generateContent.
 func isImagenModel(model string) bool {
 	lowerModel := strings.ToLower(model)
 	return strings.Contains(lowerModel, "imagen")
 }
 // getVertexAction returns the appropriate action for the given model.
 // Imagen models use "predict", while Gemini models use "generateContent".
 func getVertexAction(model string, isStream bool) string {
 	if isImagenModel(model) {
 		return "predict"
 	}
 	if isStream {
 		return "streamGenerateContent"
 	}
 	return "generateContent"
 }
 // convertImagenToGeminiResponse converts Imagen API response to Gemini format
 // so it can be processed by the standard translation pipeline.
 // This ensures Imagen models return responses in the same format as gemini-3-pro-image-preview.
 func convertImagenToGeminiResponse(data []byte, model string) []byte {
 	predictions := gjson.GetBytes(data, "predictions")
 	if !predictions.Exists() || !predictions.IsArray() {
 		return data
 	}
 	// Build Gemini-compatible response with inlineData
 	parts := make([]map[string]any, 0)
 	for _, pred := range predictions.Array() {
 		imageData := pred.Get("bytesBase64Encoded").String()
 		mimeType := pred.Get("mimeType").String()
 		if mimeType == "" {
 			mimeType = "image/png"
 		}
 		if imageData != "" {
 			parts = append(parts, map[string]any{
 				"inlineData": map[string]any{
 					"mimeType": mimeType,
 					"data":     imageData,
 				},
 			})
 		}
 	}
 	// Generate unique response ID using timestamp
 	responseId := fmt.Sprintf("imagen-%d", time.Now().UnixNano())
 	response := map[string]any{
 		"candidates": []map[string]any{{
 			"content": map[string]any{
 				"parts": parts,
 				"role":  "model",
 			},
 			"finishReason": "STOP",
 		}},
 		"responseId":   responseId,
 		"modelVersion": model,
 		// Imagen API doesn't return token counts, set to 0 for tracking purposes
 		"usageMetadata": map[string]any{
 			"promptTokenCount":     0,
 			"candidatesTokenCount": 0,
 			"totalTokenCount":      0,
 		},
 	}
 	result, err := json.Marshal(response)
 	if err != nil {
 		return data
 	}
 	return result
 }
 // convertToImagenRequest converts a Gemini-style request to Imagen API format.
 // Imagen API uses a different structure: instances[].prompt instead of contents[].
 func convertToImagenRequest(payload []byte) ([]byte, error) {
 	// Extract prompt from Gemini-style contents
 	prompt := ""
 	// Try to get prompt from contents[0].parts[0].text
 	contentsText := gjson.GetBytes(payload, "contents.0.parts.0.text")
 	if contentsText.Exists() {
 		prompt = contentsText.String()
 	}
 	// If no contents, try messages format (OpenAI-compatible)
 	if prompt == "" {
 		messagesText := gjson.GetBytes(payload, "messages.#.content")
 		if messagesText.Exists() && messagesText.IsArray() {
 			for _, msg := range messagesText.Array() {
 				if msg.String() != "" {
 					prompt = msg.String()
 					break
 				}
 			}
 		}
 	}
 	// If still no prompt, try direct prompt field
 	if prompt == "" {
 		directPrompt := gjson.GetBytes(payload, "prompt")
 		if directPrompt.Exists() {
 			prompt = directPrompt.String()
 		}
 	}
 	if prompt == "" {
 		return nil, fmt.Errorf("imagen: no prompt found in request")
 	}
 	// Build Imagen API request
 	imagenReq := map[string]any{
 		"instances": []map[string]any{
 			{
 				"prompt": prompt,
 			},
 		},
 		"parameters": map[string]any{
 			"sampleCount": 1,
 		},
 	}
 	// Extract optional parameters
 	if aspectRatio := gjson.GetBytes(payload, "aspectRatio"); aspectRatio.Exists() {
 		imagenReq["parameters"].(map[string]any)["aspectRatio"] = aspectRatio.String()
 	}
 	if sampleCount := gjson.GetBytes(payload, "sampleCount"); sampleCount.Exists() {
 		imagenReq["parameters"].(map[string]any)["sampleCount"] = int(sampleCount.Int())
 	}
 	if negativePrompt := gjson.GetBytes(payload, "negativePrompt"); negativePrompt.Exists() {
 		imagenReq["instances"].([]map[string]any)[0]["negativePrompt"] = negativePrompt.String()
 	}
 	return json.Marshal(imagenReq)
 }
 // GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials.
 type GeminiVertexExecutor struct {
 	cfg *config.Config
@@ -155,39 +293,50 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut
 // executeWithServiceAccount handles authentication using service account credentials.
 // This method contains the original service account authentication logic.
 func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
-	from := opts.SourceFormat
+	var body []byte
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
-	action := "generateContent"
+	// Handle Imagen models with special request format
 	if isImagenModel(baseModel) {
 		imagenBody, errImagen := convertToImagenRequest(req.Payload)
 		if errImagen != nil {
 			return resp, errImagen
 		}
 		body = imagenBody
 	} else {
 		// Standard Gemini translation flow
 		from := opts.SourceFormat
 		to := sdktranslator.FromString("gemini")
 		originalPayload := bytes.Clone(req.Payload)
 		if len(opts.OriginalRequest) > 0 {
 			originalPayload = bytes.Clone(opts.OriginalRequest)
 		}
 		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 		if err != nil {
 			return resp, err
 		}
 		body = fixGeminiImageAspectRatio(baseModel, body)
 		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 		body, _ = sjson.SetBytes(body, "model", baseModel)
 	}
 	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
 		}
 	}
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -250,6 +399,16 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
 	// For Imagen models, convert response to Gemini format before translation
 	// This ensures Imagen responses use the same format as gemini-3-pro-image-preview
 	if isImagenModel(baseModel) {
 		data = convertImagenToGeminiResponse(data, baseModel)
 	}
 	// Standard Gemini translation (works for both Gemini and converted Imagen responses)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
@@ -258,37 +417,31 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 // executeWithAPIKey handles authentication using API key credentials.
 func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	defer reporter.trackFailure(ctx, &err)
-	model := req.Model
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
+	defer reporter.trackFailure(ctx, &err)
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
 	body = util.ApplyDefaultThinkingIfNeeded(model, body)
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
 	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
-	action := "generateContent"
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -299,7 +452,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -367,37 +520,40 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 // executeStreamWithServiceAccount handles streaming authentication using service account credentials.
 func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	action := getVertexAction(baseModel, true)
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
-	if opts.Alt == "" {
+	// Imagen models don't support streaming, skip SSE params
-		url = url + "?alt=sse"
+	if !isImagenModel(baseModel) {
-	} else {
+		if opts.Alt == "" {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+			url = url + "?alt=sse"
 		} else {
 			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")
@@ -487,45 +643,43 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 // executeStreamWithAPIKey handles streaming authentication using API key credentials.
 func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	defer reporter.trackFailure(ctx, &err)
-	model := req.Model
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
+	defer reporter.trackFailure(ctx, &err)
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
 	body = util.ApplyDefaultThinkingIfNeeded(model, body)
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
 	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	action := getVertexAction(baseModel, true)
 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
-	if opts.Alt == "" {
+	// Imagen models don't support streaming, skip SSE params
-		url = url + "?alt=sse"
+	if !isImagenModel(baseModel) {
-	} else {
+		if opts.Alt == "" {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+			url = url + "?alt=sse"
 		} else {
 			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")
@@ -612,26 +766,27 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 // countTokensWithServiceAccount counts tokens using service account credentials.
 func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-		if budgetOverride != nil {
+
-			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
-			budgetOverride = &norm
+	if err != nil {
-		}
+		return cliproxyexecutor.Response{}, err
 		translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
 	}
-	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
+
-	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "countTokens")
 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -688,10 +843,6 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 		return cliproxyexecutor.Response{}, errRead
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
 	}
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
 	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
@@ -699,24 +850,20 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 // countTokensWithAPIKey handles token counting using API key credentials.
 func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
-	model := req.Model
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
 		model = override
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
+
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-		if budgetOverride != nil {
+
-			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
-			budgetOverride = &norm
+	if err != nil {
-		}
+		return cliproxyexecutor.Response{}, err
 		translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
 	}
-	translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
+
-	translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -726,7 +873,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens")
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens")
 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -780,10 +927,6 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 		return cliproxyexecutor.Response{}, errRead
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
 	}
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
 	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
@@ -870,53 +1013,6 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau
 	return tok.AccessToken, nil
 }
 // resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration.
 // It matches the requested model alias against configured models and returns the actual upstream name.
 func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
 	trimmed := strings.TrimSpace(alias)
 	if trimmed == "" {
 		return ""
 	}
 	entry := e.resolveVertexConfig(auth)
 	if entry == nil {
 		return ""
 	}
 	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
 	// Candidate names to match against configured aliases/names.
 	candidates := []string{strings.TrimSpace(normalizedModel)}
 	if !strings.EqualFold(normalizedModel, trimmed) {
 		candidates = append(candidates, trimmed)
 	}
 	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
 		candidates = append(candidates, original)
 	}
 	for i := range entry.Models {
 		model := entry.Models[i]
 		name := strings.TrimSpace(model.Name)
 		modelAlias := strings.TrimSpace(model.Alias)
 		for _, candidate := range candidates {
 			if candidate == "" {
 				continue
 			}
 			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
 				if name != "" {
 					return name
 				}
 				return candidate
 			}
 			if name != "" && strings.EqualFold(name, candidate) {
 				return name
 			}
 		}
 	}
 	return ""
 }
 // resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth.
 func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey {
 	if auth == nil || e.cfg == nil {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -12,6 +12,7 @@ import (
 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -67,6 +68,8 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
 // Execute performs a non-streaming chat completion request.
 func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, baseURL := iflowCreds(auth)
 	if strings.TrimSpace(apiKey) == "" {
 		err = fmt.Errorf("iflow executor: missing api key")
@@ -76,7 +79,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		baseURL = iflowauth.DefaultAPIBaseURL
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
@@ -85,17 +88,17 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+
-	body = NormalizeThinkingConfig(body, req.Model, false)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow")
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
+	if err != nil {
-		return resp, errValidate
+		return resp, err
 	}
-	body = applyIFlowThinkingConfig(body)
+
 	body = preserveReasoningContentInMessages(body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -154,6 +157,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	reporter.ensurePublished(ctx)
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
@@ -161,6 +166,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 // ExecuteStream performs a streaming chat completion request.
 func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	apiKey, baseURL := iflowCreds(auth)
 	if strings.TrimSpace(apiKey) == "" {
 		err = fmt.Errorf("iflow executor: missing api key")
@@ -170,7 +177,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		baseURL = iflowauth.DefaultAPIBaseURL
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
@@ -179,23 +186,22 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow")
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+	if err != nil {
-	body = NormalizeThinkingConfig(body, req.Model, false)
+		return nil, err
 	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
 		return nil, errValidate
 	}
-	body = applyIFlowThinkingConfig(body)
+
 	body = preserveReasoningContentInMessages(body)
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -278,11 +284,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }
 func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	enc, err := tokenizerForModel(req.Model)
+	enc, err := tokenizerForModel(baseModel)
 	if err != nil {
 		return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err)
 	}
@@ -520,41 +528,3 @@ func preserveReasoningContentInMessages(body []byte) []byte {
 	return body
 }
 // applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
 // This should be called after NormalizeThinkingConfig has processed the payload.
 //
 // Model-specific handling:
 //   - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
 //   - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
 func applyIFlowThinkingConfig(body []byte) []byte {
 	effort := gjson.GetBytes(body, "reasoning_effort")
 	if !effort.Exists() {
 		return body
 	}
 	model := strings.ToLower(gjson.GetBytes(body, "model").String())
 	val := strings.ToLower(strings.TrimSpace(effort.String()))
 	enableThinking := val != "none" && val != ""
 	// Remove reasoning_effort as we'll convert to model-specific format
 	body, _ = sjson.DeleteBytes(body, "reasoning_effort")
 	body, _ = sjson.DeleteBytes(body, "thinking")
 	// GLM-4.6/4.7: Use chat_template_kwargs
 	if strings.HasPrefix(model, "glm-4") {
 		body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
 		if enableThinking {
 			body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
 		}
 		return body
 	}
 	// MiniMax M2/M2.1: Use reasoning_split
 	if strings.HasPrefix(model, "minimax-m2") {
 		body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
 		return body
 	}
 	return body
 }
--- a/internal/runtime/executor/iflow_executor_test.go
+++ b/internal/runtime/executor/iflow_executor_test.go
@@ -0,0 +1,67 @@
 package executor
 import (
 	"testing"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 )
 func TestIFlowExecutorParseSuffix(t *testing.T) {
 	tests := []struct {
 		name      string
 		model     string
 		wantBase  string
 		wantLevel string
 	}{
 		{"no suffix", "glm-4", "glm-4", ""},
 		{"glm with suffix", "glm-4.1-flash(high)", "glm-4.1-flash", "high"},
 		{"minimax no suffix", "minimax-m2", "minimax-m2", ""},
 		{"minimax with suffix", "minimax-m2.1(medium)", "minimax-m2.1", "medium"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := thinking.ParseSuffix(tt.model)
 			if result.ModelName != tt.wantBase {
 				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
 			}
 		})
 	}
 }
 func TestPreserveReasoningContentInMessages(t *testing.T) {
 	tests := []struct {
 		name  string
 		input []byte
 		want  []byte // nil means output should equal input
 	}{
 		{
 			"non-glm model passthrough",
 			[]byte(`{"model":"gpt-4","messages":[]}`),
 			nil,
 		},
 		{
 			"glm model with empty messages",
 			[]byte(`{"model":"glm-4","messages":[]}`),
 			nil,
 		},
 		{
 			"glm model preserves existing reasoning_content",
 			[]byte(`{"model":"glm-4","messages":[{"role":"assistant","content":"hi","reasoning_content":"thinking..."}]}`),
 			nil,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := preserveReasoningContentInMessages(tt.input)
 			want := tt.want
 			if want == nil {
 				want = tt.input
 			}
 			if string(got) != string(want) {
 				t.Errorf("preserveReasoningContentInMessages() = %s, want %s", got, want)
 			}
 		})
 	}
 }
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -11,6 +11,7 @@ import (
 	"time"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -69,7 +70,9 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau
 }
 func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	baseURL, apiKey := e.resolveCredentials(auth)
@@ -85,18 +88,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
-	modelOverride := e.resolveUpstreamModel(req.Model, auth)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
-	if modelOverride != "" {
+
-		translated = e.overrideModel(translated, modelOverride)
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
-	}
+	if err != nil {
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
+		return resp, err
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
 	if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
 		return resp, errValidate
 	}
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -168,7 +166,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 }
 func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	baseURL, apiKey := e.resolveCredentials(auth)
@@ -176,24 +176,20 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		err = statusErr{code: http.StatusUnauthorized, msg: "missing provider baseURL"}
 		return nil, err
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	modelOverride := e.resolveUpstreamModel(req.Model, auth)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
-	if modelOverride != "" {
+
-		translated = e.overrideModel(translated, modelOverride)
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
-	}
+	if err != nil {
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
+		return nil, err
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
 	if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
 		return nil, errValidate
 	}
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -293,14 +289,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 }
 func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	modelForCounting := req.Model
+	modelForCounting := baseModel
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+
-		translated = e.overrideModel(translated, modelOverride)
+	translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
-		modelForCounting = modelOverride
+	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
 	enc, err := tokenizerForModel(modelForCounting)
@@ -336,53 +335,6 @@ func (e *OpenAICompatExecutor) resolveCredentials(auth *cliproxyauth.Auth) (base
 	return
 }
 func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
 	if alias == "" || auth == nil || e.cfg == nil {
 		return ""
 	}
 	compat := e.resolveCompatConfig(auth)
 	if compat == nil {
 		return ""
 	}
 	for i := range compat.Models {
 		model := compat.Models[i]
 		if model.Alias != "" {
 			if strings.EqualFold(model.Alias, alias) {
 				if model.Name != "" {
 					return model.Name
 				}
 				return alias
 			}
 			continue
 		}
 		if strings.EqualFold(model.Name, alias) {
 			return model.Name
 		}
 	}
 	return ""
 }
 func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
 	trimmed := strings.TrimSpace(model)
 	if trimmed == "" || e == nil || e.cfg == nil {
 		return false
 	}
 	compat := e.resolveCompatConfig(auth)
 	if compat == nil || len(compat.Models) == 0 {
 		return false
 	}
 	for i := range compat.Models {
 		entry := compat.Models[i]
 		if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
 			return true
 		}
 		if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
 			return true
 		}
 	}
 	return false
 }
 func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -1,109 +1,14 @@
 package executor
 import (
-	"fmt"
+	"encoding/json"
 	"net/http"
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
 	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	// Determine which model to use for thinking support check.
 	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
 	thinkingModel := lookupModel
 	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
 		thinkingModel = model
 	}
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
 	if !util.ModelSupportsThinking(thinkingModel) {
 		return payload
 	}
 	if budgetOverride != nil {
 		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
 // ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
 func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
 	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	// Determine which model to use for thinking support check.
 	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
 	thinkingModel := lookupModel
 	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
 		thinkingModel = model
 	}
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
 	if !util.ModelSupportsThinking(thinkingModel) {
 		return payload
 	}
 	if budgetOverride != nil {
 		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }
 // ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
 // Metadata values take precedence over any existing field when the model supports thinking, intentionally
 // overwriting caller-provided values to honor suffix/default metadata priority.
 func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
 	if field == "" {
 		return payload
 	}
 	baseModel := util.ResolveOriginalModel(model, metadata)
 	if baseModel == "" {
 		baseModel = model
 	}
 	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
 		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 				return updated
 			}
 		}
 	}
 	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
 	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
 			}
 		}
 	}
 	return payload
 }
 // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
 // paths as relative to the provided root path (for example, "request" for Gemini CLI)
 // and restricts matches to the given protocol when supplied. Defaults are checked
@@ -113,13 +18,14 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 		return payload
 	}
 	rules := cfg.Payload
-	if len(rules.Default) == 0 && len(rules.Override) == 0 {
+	if len(rules.Default) == 0 && len(rules.DefaultRaw) == 0 && len(rules.Override) == 0 && len(rules.OverrideRaw) == 0 {
 		return payload
 	}
 	model = strings.TrimSpace(model)
 	if model == "" {
 		return payload
 	}
 	candidates := payloadModelCandidates(cfg, model, protocol)
 	out := payload
 	source := original
 	if len(source) == 0 {
@@ -129,7 +35,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default rules: first write wins per field across all matching rules.
 	for i := range rules.Default {
 		rule := &rules.Default[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -151,10 +57,39 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			appliedDefaults[fullPath] = struct{}{}
 		}
 	}
 	// Apply default raw rules: first write wins per field across all matching rules.
 	for i := range rules.DefaultRaw {
 		rule := &rules.DefaultRaw[i]
 		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
 			fullPath := buildPayloadPath(root, path)
 			if fullPath == "" {
 				continue
 			}
 			if gjson.GetBytes(source, fullPath).Exists() {
 				continue
 			}
 			if _, ok := appliedDefaults[fullPath]; ok {
 				continue
 			}
 			rawValue, ok := payloadRawValue(value)
 			if !ok {
 				continue
 			}
 			updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
 			if errSet != nil {
 				continue
 			}
 			out = updated
 			appliedDefaults[fullPath] = struct{}{}
 		}
 	}
 	// Apply override rules: last write wins per field across all matching rules.
 	for i := range rules.Override {
 		rule := &rules.Override[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -169,9 +104,43 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			out = updated
 		}
 	}
 	// Apply override raw rules: last write wins per field across all matching rules.
 	for i := range rules.OverrideRaw {
 		rule := &rules.OverrideRaw[i]
 		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
 			fullPath := buildPayloadPath(root, path)
 			if fullPath == "" {
 				continue
 			}
 			rawValue, ok := payloadRawValue(value)
 			if !ok {
 				continue
 			}
 			updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
 			if errSet != nil {
 				continue
 			}
 			out = updated
 		}
 	}
 	return out
 }
 func payloadRuleMatchesModels(rule *config.PayloadRule, protocol string, models []string) bool {
 	if rule == nil || len(models) == 0 {
 		return false
 	}
 	for _, model := range models {
 		if payloadRuleMatchesModel(rule, model, protocol) {
 			return true
 		}
 	}
 	return false
 }
 func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) bool {
 	if rule == nil {
 		return false
@@ -194,6 +163,65 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b
 	return false
 }
 func payloadModelCandidates(cfg *config.Config, model, protocol string) []string {
 	model = strings.TrimSpace(model)
 	if model == "" {
 		return nil
 	}
 	candidates := []string{model}
 	if cfg == nil {
 		return candidates
 	}
 	aliases := payloadModelAliases(cfg, model, protocol)
 	if len(aliases) == 0 {
 		return candidates
 	}
 	seen := map[string]struct{}{strings.ToLower(model): struct{}{}}
 	for _, alias := range aliases {
 		alias = strings.TrimSpace(alias)
 		if alias == "" {
 			continue
 		}
 		key := strings.ToLower(alias)
 		if _, ok := seen[key]; ok {
 			continue
 		}
 		seen[key] = struct{}{}
 		candidates = append(candidates, alias)
 	}
 	return candidates
 }
 func payloadModelAliases(cfg *config.Config, model, protocol string) []string {
 	if cfg == nil {
 		return nil
 	}
 	model = strings.TrimSpace(model)
 	if model == "" {
 		return nil
 	}
 	channel := strings.ToLower(strings.TrimSpace(protocol))
 	if channel == "" {
 		return nil
 	}
 	entries := cfg.OAuthModelAlias[channel]
 	if len(entries) == 0 {
 		return nil
 	}
 	aliases := make([]string, 0, 2)
 	for _, entry := range entries {
 		if !strings.EqualFold(strings.TrimSpace(entry.Name), model) {
 			continue
 		}
 		alias := strings.TrimSpace(entry.Alias)
 		if alias == "" {
 			continue
 		}
 		aliases = append(aliases, alias)
 	}
 	return aliases
 }
 // buildPayloadPath combines an optional root path with a relative parameter path.
 // When root is empty, the parameter path is used as-is. When root is non-empty,
 // the parameter path is treated as relative to root.
@@ -212,6 +240,24 @@ func buildPayloadPath(root, path string) string {
 	return r + "." + p
 }
 func payloadRawValue(value any) ([]byte, bool) {
 	if value == nil {
 		return nil, false
 	}
 	switch typed := value.(type) {
 	case string:
 		return []byte(typed), true
 	case []byte:
 		return typed, true
 	default:
 		raw, errMarshal := json.Marshal(typed)
 		if errMarshal != nil {
 			return nil, false
 		}
 		return raw, true
 	}
 }
 // matchModelPattern performs simple wildcard matching where '*' matches zero or more characters.
 // Examples:
 //
@@ -256,102 +302,3 @@ func matchModelPattern(pattern, model string) bool {
 	}
 	return pi == len(pattern)
 }
 // NormalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
 // normalizes the reasoning effort level. For models with numeric budget thinking,
 // it strips the effort string fields.
 func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		if allowCompat {
 			return payload
 		}
 		return StripThinkingFields(payload, false)
 	}
 	if util.ModelUsesThinkingLevels(model) {
 		return NormalizeReasoningEffortLevel(payload, model)
 	}
 	// Model supports thinking but uses numeric budgets, not levels.
 	// Strip effort string fields since they are not applicable.
 	return StripThinkingFields(payload, true)
 }
 // StripThinkingFields removes thinking-related fields from the payload for
 // models that do not support thinking. If effortOnly is true, only removes
 // effort string fields (for models using numeric budgets).
 func StripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
 		"reasoning_effort",
 		"reasoning.effort",
 	}
 	if !effortOnly {
 		fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...)
 	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
 			out, _ = sjson.DeleteBytes(out, field)
 		}
 	}
 	return out
 }
 // NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
 // or reasoning.effort field for level-based thinking models.
 func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
 	out := payload
 	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
 		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
 			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
 		}
 	}
 	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
 		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
 			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
 		}
 	}
 	return out
 }
 // ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
 // Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
 // downgrading requests.
 func ValidateThinkingConfig(payload []byte, model string) error {
 	if len(payload) == 0 || model == "" {
 		return nil
 	}
 	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
 		return nil
 	}
 	levels := util.GetModelThinkingLevels(model)
 	checkField := func(path string) error {
 		if effort := gjson.GetBytes(payload, path); effort.Exists() {
 			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
 				return statusErr{
 					code: http.StatusBadRequest,
 					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
 				}
 			}
 		}
 		return nil
 	}
 	if err := checkField("reasoning_effort"); err != nil {
 		return err
 	}
 	if err := checkField("reasoning.effort"); err != nil {
 		return err
 	}
 	return nil
 }
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -12,6 +12,7 @@ import (
 	qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -65,12 +66,14 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
 }
 func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	token, baseURL := qwenCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	token, baseURL := qwenCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://portal.qwen.ai/v1"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
@@ -79,15 +82,16 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+
-	body = NormalizeThinkingConfig(body, req.Model, false)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
+	if err != nil {
-		return resp, errValidate
+		return resp, err
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -140,18 +144,22 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseOpenAIUsage(data))
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	token, baseURL := qwenCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	token, baseURL := qwenCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://portal.qwen.ai/v1"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
@@ -160,15 +168,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+	if err != nil {
-	body = NormalizeThinkingConfig(body, req.Model, false)
+		return nil, err
 	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
 		return nil, errValidate
 	}
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.
@@ -176,7 +184,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
 	}
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -256,13 +264,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 }
 func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	modelName := gjson.GetBytes(body, "model").String()
 	if strings.TrimSpace(modelName) == "" {
-		modelName = req.Model
+		modelName = baseModel
 	}
 	enc, err := tokenizerForModel(modelName)
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -0,0 +1,30 @@
 package executor
 import (
 	"testing"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 )
 func TestQwenExecutorParseSuffix(t *testing.T) {
 	tests := []struct {
 		name      string
 		model     string
 		wantBase  string
 		wantLevel string
 	}{
 		{"no suffix", "qwen-max", "qwen-max", ""},
 		{"with level suffix", "qwen-max(high)", "qwen-max", "high"},
 		{"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"},
 		{"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := thinking.ParseSuffix(tt.model)
 			if result.ModelName != tt.wantBase {
 				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
 			}
 		})
 	}
 }
--- a/internal/runtime/executor/thinking_providers.go
+++ b/internal/runtime/executor/thinking_providers.go
@@ -0,0 +1,11 @@
 package executor
 import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
 )
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -0,0 +1,481 @@
 // Package thinking provides unified thinking configuration processing.
 package thinking
 import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 )
 // providerAppliers maps provider names to their ProviderApplier implementations.
 var providerAppliers = map[string]ProviderApplier{
 	"gemini":      nil,
 	"gemini-cli":  nil,
 	"claude":      nil,
 	"openai":      nil,
 	"codex":       nil,
 	"iflow":       nil,
 	"antigravity": nil,
 }
 // GetProviderApplier returns the ProviderApplier for the given provider name.
 // Returns nil if the provider is not registered.
 func GetProviderApplier(provider string) ProviderApplier {
 	return providerAppliers[provider]
 }
 // RegisterProvider registers a provider applier by name.
 func RegisterProvider(name string, applier ProviderApplier) {
 	providerAppliers[name] = applier
 }
 // IsUserDefinedModel reports whether the model is a user-defined model that should
 // have thinking configuration passed through without validation.
 //
 // User-defined models are configured via config file's models[] array
 // (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models
 // are marked with UserDefined=true at registration time.
 //
 // User-defined models should have their thinking configuration applied directly,
 // letting the upstream service validate the configuration.
 func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 	if modelInfo == nil {
 		return true
 	}
 	return modelInfo.UserDefined
 }
 // ApplyThinking applies thinking configuration to a request body.
 //
 // This is the unified entry point for all providers. It follows the processing
 // order defined in FR25: route check → model capability query → config extraction
 // → validation → application.
 //
 // Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
 // the suffix configuration takes priority over any thinking parameters in the request body.
 // This enables users to override thinking settings via the model name without modifying their
 // request payload.
 //
 // Parameters:
 //   - body: Original request body JSON
 //   - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
 //   - fromFormat: Source request format (e.g., openai, codex, gemini)
 //   - toFormat: Target provider format for the request body (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
 //
 // Returns:
 //   - Modified request body JSON with thinking configuration applied
 //   - Error if validation fails (ThinkingError). On error, the original body
 //     is returned (not nil) to enable defensive programming patterns.
 //
 // Passthrough behavior (returns original body without error):
 //   - Unknown provider (not in providerAppliers map)
 //   - modelInfo.Thinking is nil (model doesn't support thinking)
 //
 // Note: Unknown models (modelInfo is nil) are treated as user-defined models: we skip
 // validation and still apply the thinking config so the upstream can validate it.
 //
 // Example:
 //
 //	// With suffix - suffix config takes priority
 //	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini")
 //
 //	// Without suffix - uses body config
 //	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini")
 func ApplyThinking(body []byte, model string, fromFormat string, toFormat string) ([]byte, error) {
 	providerFormat := strings.ToLower(strings.TrimSpace(toFormat))
 	fromFormat = strings.ToLower(strings.TrimSpace(fromFormat))
 	if fromFormat == "" {
 		fromFormat = providerFormat
 	}
 	// 1. Route check: Get provider applier
 	applier := GetProviderApplier(providerFormat)
 	if applier == nil {
 		log.WithFields(log.Fields{
 			"provider": providerFormat,
 			"model":    model,
 		}).Debug("thinking: unknown provider, passthrough |")
 		return body, nil
 	}
 	// 2. Parse suffix and get modelInfo
 	suffixResult := ParseSuffix(model)
 	baseModel := suffixResult.ModelName
 	modelInfo := registry.LookupModelInfo(baseModel)
 	// 3. Model capability check
 	// Unknown models are treated as user-defined so thinking config can still be applied.
 	// The upstream service is responsible for validating the configuration.
 	if IsUserDefinedModel(modelInfo) {
 		return applyUserDefinedModel(body, modelInfo, fromFormat, providerFormat, suffixResult)
 	}
 	if modelInfo.Thinking == nil {
 		config := extractThinkingConfig(body, providerFormat)
 		if hasThinkingConfig(config) {
 			log.WithFields(log.Fields{
 				"model":    baseModel,
 				"provider": providerFormat,
 			}).Debug("thinking: model does not support thinking, stripping config |")
 			return StripThinkingConfig(body, providerFormat), nil
 		}
 		log.WithFields(log.Fields{
 			"provider": providerFormat,
 			"model":    baseModel,
 		}).Debug("thinking: model does not support thinking, passthrough |")
 		return body, nil
 	}
 	// 4. Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
 		config = parseSuffixToConfig(suffixResult.RawSuffix, providerFormat, model)
 		log.WithFields(log.Fields{
 			"provider": providerFormat,
 			"model":    model,
 			"mode":     config.Mode,
 			"budget":   config.Budget,
 			"level":    config.Level,
 		}).Debug("thinking: config from model suffix |")
 	} else {
 		config = extractThinkingConfig(body, providerFormat)
 		if hasThinkingConfig(config) {
 			log.WithFields(log.Fields{
 				"provider": providerFormat,
 				"model":    modelInfo.ID,
 				"mode":     config.Mode,
 				"budget":   config.Budget,
 				"level":    config.Level,
 			}).Debug("thinking: original config from request |")
 		}
 	}
 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
 			"provider": providerFormat,
 			"model":    modelInfo.ID,
 		}).Debug("thinking: no config found, passthrough |")
 		return body, nil
 	}
 	// 5. Validate and normalize configuration
 	validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat, suffixResult.HasSuffix)
 	if err != nil {
 		log.WithFields(log.Fields{
 			"provider": providerFormat,
 			"model":    modelInfo.ID,
 			"error":    err.Error(),
 		}).Warn("thinking: validation failed |")
 		// Return original body on validation failure (defensive programming).
 		// This ensures callers who ignore the error won't receive nil body.
 		// The upstream service will decide how to handle the unmodified request.
 		return body, err
 	}
 	// Defensive check: ValidateConfig should never return (nil, nil)
 	if validated == nil {
 		log.WithFields(log.Fields{
 			"provider": providerFormat,
 			"model":    modelInfo.ID,
 		}).Warn("thinking: ValidateConfig returned nil config without error, passthrough |")
 		return body, nil
 	}
 	log.WithFields(log.Fields{
 		"provider": providerFormat,
 		"model":    modelInfo.ID,
 		"mode":     validated.Mode,
 		"budget":   validated.Budget,
 		"level":    validated.Level,
 	}).Debug("thinking: processed config to apply |")
 	// 6. Apply configuration using provider-specific applier
 	return applier.Apply(body, *validated, modelInfo)
 }
 // parseSuffixToConfig converts a raw suffix string to ThinkingConfig.
 //
 // Parsing priority:
 //  1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto
 //  2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel
 //  3. Numeric values: positive integers → ModeBudget, 0 → ModeNone
 //
 // If none of the above match, returns empty ThinkingConfig (treated as no config).
 func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig {
 	// 1. Try special values first (none, auto, -1)
 	if mode, ok := ParseSpecialSuffix(rawSuffix); ok {
 		switch mode {
 		case ModeNone:
 			return ThinkingConfig{Mode: ModeNone, Budget: 0}
 		case ModeAuto:
 			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
 		}
 	}
 	// 2. Try level parsing (minimal, low, medium, high, xhigh)
 	if level, ok := ParseLevelSuffix(rawSuffix); ok {
 		return ThinkingConfig{Mode: ModeLevel, Level: level}
 	}
 	// 3. Try numeric parsing
 	if budget, ok := ParseNumericSuffix(rawSuffix); ok {
 		if budget == 0 {
 			return ThinkingConfig{Mode: ModeNone, Budget: 0}
 		}
 		return ThinkingConfig{Mode: ModeBudget, Budget: budget}
 	}
 	// Unknown suffix format - return empty config
 	log.WithFields(log.Fields{
 		"provider":   provider,
 		"model":      model,
 		"raw_suffix": rawSuffix,
 	}).Debug("thinking: unknown suffix format, treating as no config |")
 	return ThinkingConfig{}
 }
 // applyUserDefinedModel applies thinking configuration for user-defined models
 // without ThinkingSupport validation.
 func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, fromFormat, toFormat string, suffixResult SuffixResult) ([]byte, error) {
 	// Get model ID for logging
 	modelID := ""
 	if modelInfo != nil {
 		modelID = modelInfo.ID
 	} else {
 		modelID = suffixResult.ModelName
 	}
 	// Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
 		config = parseSuffixToConfig(suffixResult.RawSuffix, toFormat, modelID)
 	} else {
 		config = extractThinkingConfig(body, toFormat)
 	}
 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
 			"model":    modelID,
 			"provider": toFormat,
 		}).Debug("thinking: user-defined model, passthrough (no config) |")
 		return body, nil
 	}
 	applier := GetProviderApplier(toFormat)
 	if applier == nil {
 		log.WithFields(log.Fields{
 			"model":    modelID,
 			"provider": toFormat,
 		}).Debug("thinking: user-defined model, passthrough (unknown provider) |")
 		return body, nil
 	}
 	log.WithFields(log.Fields{
 		"provider": toFormat,
 		"model":    modelID,
 		"mode":     config.Mode,
 		"budget":   config.Budget,
 		"level":    config.Level,
 	}).Debug("thinking: applying config for user-defined model (skip validation)")
 	config = normalizeUserDefinedConfig(config, fromFormat, toFormat)
 	return applier.Apply(body, config, modelInfo)
 }
 func normalizeUserDefinedConfig(config ThinkingConfig, fromFormat, toFormat string) ThinkingConfig {
 	if config.Mode != ModeLevel {
 		return config
 	}
 	if !isBudgetBasedProvider(toFormat) || !isLevelBasedProvider(fromFormat) {
 		return config
 	}
 	budget, ok := ConvertLevelToBudget(string(config.Level))
 	if !ok {
 		return config
 	}
 	config.Mode = ModeBudget
 	config.Budget = budget
 	config.Level = ""
 	return config
 }
 // extractThinkingConfig extracts provider-specific thinking config from request body.
 func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		return ThinkingConfig{}
 	}
 	switch provider {
 	case "claude":
 		return extractClaudeConfig(body)
 	case "gemini", "gemini-cli", "antigravity":
 		return extractGeminiConfig(body, provider)
 	case "openai":
 		return extractOpenAIConfig(body)
 	case "codex":
 		return extractCodexConfig(body)
 	case "iflow":
 		config := extractIFlowConfig(body)
 		if hasThinkingConfig(config) {
 			return config
 		}
 		return extractOpenAIConfig(body)
 	default:
 		return ThinkingConfig{}
 	}
 }
 func hasThinkingConfig(config ThinkingConfig) bool {
 	return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
 }
 // extractClaudeConfig extracts thinking configuration from Claude format request body.
 //
 // Claude API format:
 //   - thinking.type: "enabled" or "disabled"
 //   - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget)
 //
 // Priority: thinking.type="disabled" takes precedence over budget_tokens.
 // When type="enabled" without budget_tokens, returns ModeAuto to indicate
 // the user wants thinking enabled but didn't specify a budget.
 func extractClaudeConfig(body []byte) ThinkingConfig {
 	thinkingType := gjson.GetBytes(body, "thinking.type").String()
 	if thinkingType == "disabled" {
 		return ThinkingConfig{Mode: ModeNone, Budget: 0}
 	}
 	// Check budget_tokens
 	if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() {
 		value := int(budget.Int())
 		switch value {
 		case 0:
 			return ThinkingConfig{Mode: ModeNone, Budget: 0}
 		case -1:
 			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
 		default:
 			return ThinkingConfig{Mode: ModeBudget, Budget: value}
 		}
 	}
 	// If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified)
 	if thinkingType == "enabled" {
 		return ThinkingConfig{Mode: ModeAuto, Budget: -1}
 	}
 	return ThinkingConfig{}
 }
 // extractGeminiConfig extracts thinking configuration from Gemini format request body.
 //
 // Gemini API format:
 //   - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3)
 //   - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5)
 //
 // For gemini-cli and antigravity providers, the path is prefixed with "request.".
 //
 // Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format).
 // This allows newer Gemini 3 level-based configs to take precedence.
 func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
 	prefix := "generationConfig.thinkingConfig"
 	if provider == "gemini-cli" || provider == "antigravity" {
 		prefix = "request.generationConfig.thinkingConfig"
 	}
 	// Check thinkingLevel first (Gemini 3 format takes precedence)
 	if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
 		value := level.String()
 		switch value {
 		case "none":
 			return ThinkingConfig{Mode: ModeNone, Budget: 0}
 		case "auto":
 			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
 		default:
 			return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
 		}
 	}
 	// Check thinkingBudget (Gemini 2.5 format)
 	if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
 		value := int(budget.Int())
 		switch value {
 		case 0:
 			return ThinkingConfig{Mode: ModeNone, Budget: 0}
 		case -1:
 			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
 		default:
 			return ThinkingConfig{Mode: ModeBudget, Budget: value}
 		}
 	}
 	return ThinkingConfig{}
 }
 // extractOpenAIConfig extracts thinking configuration from OpenAI format request body.
 //
 // OpenAI API format:
 //   - reasoning_effort: "none", "low", "medium", "high" (discrete levels)
 //
 // OpenAI uses level-based thinking configuration only, no numeric budget support.
 // The "none" value is treated specially to return ModeNone.
 func extractOpenAIConfig(body []byte) ThinkingConfig {
 	// Check reasoning_effort (OpenAI Chat Completions format)
 	if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() {
 		value := effort.String()
 		if value == "none" {
 			return ThinkingConfig{Mode: ModeNone, Budget: 0}
 		}
 		return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
 	}
 	return ThinkingConfig{}
 }
 // extractCodexConfig extracts thinking configuration from Codex format request body.
 //
 // Codex API format (OpenAI Responses API):
 //   - reasoning.effort: "none", "low", "medium", "high"
 //
 // This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort".
 func extractCodexConfig(body []byte) ThinkingConfig {
 	// Check reasoning.effort (Codex / OpenAI Responses API format)
 	if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() {
 		value := effort.String()
 		if value == "none" {
 			return ThinkingConfig{Mode: ModeNone, Budget: 0}
 		}
 		return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
 	}
 	return ThinkingConfig{}
 }
 // extractIFlowConfig extracts thinking configuration from iFlow format request body.
 //
 // iFlow API format (supports multiple model families):
 //   - GLM format: chat_template_kwargs.enable_thinking (boolean)
 //   - MiniMax format: reasoning_split (boolean)
 //
 // Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled".
 // The actual budget/configuration is determined by the iFlow applier based on model capabilities.
 // Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off.
 func extractIFlowConfig(body []byte) ThinkingConfig {
 	// GLM format: chat_template_kwargs.enable_thinking
 	if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() {
 		if enabled.Bool() {
 			// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
 			return ThinkingConfig{Mode: ModeBudget, Budget: 1}
 		}
 		return ThinkingConfig{Mode: ModeNone, Budget: 0}
 	}
 	// MiniMax format: reasoning_split
 	if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() {
 		if split.Bool() {
 			// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
 			return ThinkingConfig{Mode: ModeBudget, Budget: 1}
 		}
 		return ThinkingConfig{Mode: ModeNone, Budget: 0}
 	}
 	return ThinkingConfig{}
 }
--- a/internal/thinking/convert.go
+++ b/internal/thinking/convert.go
@@ -0,0 +1,142 @@
 package thinking
 import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 )
 // levelToBudgetMap defines the standard Level → Budget mapping.
 // All keys are lowercase; lookups should use strings.ToLower.
 var levelToBudgetMap = map[string]int{
 	"none":    0,
 	"auto":    -1,
 	"minimal": 512,
 	"low":     1024,
 	"medium":  8192,
 	"high":    24576,
 	"xhigh":   32768,
 }
 // ConvertLevelToBudget converts a thinking level to a budget value.
 //
 // This is a semantic conversion that maps discrete levels to numeric budgets.
 // Level matching is case-insensitive.
 //
 // Level → Budget mapping:
 //   - none    → 0
 //   - auto    → -1
 //   - minimal → 512
 //   - low     → 1024
 //   - medium  → 8192
 //   - high    → 24576
 //   - xhigh   → 32768
 //
 // Returns:
 //   - budget: The converted budget value
 //   - ok: true if level is valid, false otherwise
 func ConvertLevelToBudget(level string) (int, bool) {
 	budget, ok := levelToBudgetMap[strings.ToLower(level)]
 	return budget, ok
 }
 // BudgetThreshold constants define the upper bounds for each thinking level.
 // These are used by ConvertBudgetToLevel for range-based mapping.
 const (
 	// ThresholdMinimal is the upper bound for "minimal" level (1-512)
 	ThresholdMinimal = 512
 	// ThresholdLow is the upper bound for "low" level (513-1024)
 	ThresholdLow = 1024
 	// ThresholdMedium is the upper bound for "medium" level (1025-8192)
 	ThresholdMedium = 8192
 	// ThresholdHigh is the upper bound for "high" level (8193-24576)
 	ThresholdHigh = 24576
 )
 // ConvertBudgetToLevel converts a budget value to the nearest thinking level.
 //
 // This is a semantic conversion that maps numeric budgets to discrete levels.
 // Uses threshold-based mapping for range conversion.
 //
 // Budget → Level thresholds:
 //   - -1        → auto
 //   - 0         → none
 //   - 1-512     → minimal
 //   - 513-1024  → low
 //   - 1025-8192 → medium
 //   - 8193-24576 → high
 //   - 24577+    → xhigh
 //
 // Returns:
 //   - level: The converted thinking level string
 //   - ok: true if budget is valid, false for invalid negatives (< -1)
 func ConvertBudgetToLevel(budget int) (string, bool) {
 	switch {
 	case budget < -1:
 		// Invalid negative values
 		return "", false
 	case budget == -1:
 		return string(LevelAuto), true
 	case budget == 0:
 		return string(LevelNone), true
 	case budget <= ThresholdMinimal:
 		return string(LevelMinimal), true
 	case budget <= ThresholdLow:
 		return string(LevelLow), true
 	case budget <= ThresholdMedium:
 		return string(LevelMedium), true
 	case budget <= ThresholdHigh:
 		return string(LevelHigh), true
 	default:
 		return string(LevelXHigh), true
 	}
 }
 // ModelCapability describes the thinking format support of a model.
 type ModelCapability int
 const (
 	// CapabilityUnknown indicates modelInfo is nil (passthrough behavior, internal use).
 	CapabilityUnknown ModelCapability = iota - 1
 	// CapabilityNone indicates model doesn't support thinking (Thinking is nil).
 	CapabilityNone
 	// CapabilityBudgetOnly indicates the model supports numeric budgets only.
 	CapabilityBudgetOnly
 	// CapabilityLevelOnly indicates the model supports discrete levels only.
 	CapabilityLevelOnly
 	// CapabilityHybrid indicates the model supports both budgets and levels.
 	CapabilityHybrid
 )
 // detectModelCapability determines the thinking format capability of a model.
 //
 // This is an internal function used by validation and conversion helpers.
 // It analyzes the model's ThinkingSupport configuration to classify the model:
 //   - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking)
 //   - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5)
 //   - CapabilityLevelOnly: Has Levels but no Min/Max (OpenAI, iFlow)
 //   - CapabilityHybrid: Has both Min/Max and Levels (Gemini 3)
 //
 // Note: Returns a special sentinel value when modelInfo itself is nil (unknown model).
 func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability {
 	if modelInfo == nil {
 		return CapabilityUnknown // sentinel for "passthrough" behavior
 	}
 	if modelInfo.Thinking == nil {
 		return CapabilityNone
 	}
 	support := modelInfo.Thinking
 	hasBudget := support.Min > 0 || support.Max > 0
 	hasLevels := len(support.Levels) > 0
 	switch {
 	case hasBudget && hasLevels:
 		return CapabilityHybrid
 	case hasBudget:
 		return CapabilityBudgetOnly
 	case hasLevels:
 		return CapabilityLevelOnly
 	default:
 		return CapabilityNone
 	}
 }
--- a/internal/thinking/errors.go
+++ b/internal/thinking/errors.go
@@ -0,0 +1,82 @@
 // Package thinking provides unified thinking configuration processing logic.
 package thinking
 import "net/http"
 // ErrorCode represents the type of thinking configuration error.
 type ErrorCode string
 // Error codes for thinking configuration processing.
 const (
 	// ErrInvalidSuffix indicates the suffix format cannot be parsed.
 	// Example: "model(abc" (missing closing parenthesis)
 	ErrInvalidSuffix ErrorCode = "INVALID_SUFFIX"
 	// ErrUnknownLevel indicates the level value is not in the valid list.
 	// Example: "model(ultra)" where "ultra" is not a valid level
 	ErrUnknownLevel ErrorCode = "UNKNOWN_LEVEL"
 	// ErrThinkingNotSupported indicates the model does not support thinking.
 	// Example: claude-haiku-4-5 does not have thinking capability
 	ErrThinkingNotSupported ErrorCode = "THINKING_NOT_SUPPORTED"
 	// ErrLevelNotSupported indicates the model does not support level mode.
 	// Example: using level with a budget-only model
 	ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED"
 	// ErrBudgetOutOfRange indicates the budget value is outside model range.
 	// Example: budget 64000 exceeds max 20000
 	ErrBudgetOutOfRange ErrorCode = "BUDGET_OUT_OF_RANGE"
 	// ErrProviderMismatch indicates the provider does not match the model.
 	// Example: applying Claude format to a Gemini model
 	ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH"
 )
 // ThinkingError represents an error that occurred during thinking configuration processing.
 //
 // This error type provides structured information about the error, including:
 //   - Code: A machine-readable error code for programmatic handling
 //   - Message: A human-readable description of the error
 //   - Model: The model name related to the error (optional)
 //   - Details: Additional context information (optional)
 type ThinkingError struct {
 	// Code is the machine-readable error code
 	Code ErrorCode
 	// Message is the human-readable error description.
 	// Should be lowercase, no trailing period, with context if applicable.
 	Message string
 	// Model is the model name related to this error (optional)
 	Model string
 	// Details contains additional context information (optional)
 	Details map[string]interface{}
 }
 // Error implements the error interface.
 // Returns the message directly without code prefix.
 // Use Code field for programmatic error handling.
 func (e *ThinkingError) Error() string {
 	return e.Message
 }
 // NewThinkingError creates a new ThinkingError with the given code and message.
 func NewThinkingError(code ErrorCode, message string) *ThinkingError {
 	return &ThinkingError{
 		Code:    code,
 		Message: message,
 	}
 }
 // NewThinkingErrorWithModel creates a new ThinkingError with model context.
 func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingError {
 	return &ThinkingError{
 		Code:    code,
 		Message: message,
 		Model:   model,
 	}
 }
 // StatusCode implements a portable status code interface for HTTP handlers.
 func (e *ThinkingError) StatusCode() int {
 	return http.StatusBadRequest
 }
--- a/internal/thinking/provider/antigravity/apply.go
+++ b/internal/thinking/provider/antigravity/apply.go
@@ -0,0 +1,201 @@
 // Package antigravity implements thinking configuration for Antigravity API format.
 //
 // Antigravity uses request.generationConfig.thinkingConfig.* path (same as gemini-cli)
 // but requires additional normalization for Claude models:
 //   - Ensure thinking budget < max_tokens
 //   - Remove thinkingConfig if budget < minimum allowed
 package antigravity
 import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // Applier applies thinking configuration for Antigravity API format.
 type Applier struct{}
 var _ thinking.ProviderApplier = (*Applier)(nil)
 // NewApplier creates a new Antigravity thinking applier.
 func NewApplier() *Applier {
 	return &Applier{}
 }
 func init() {
 	thinking.RegisterProvider("antigravity", NewApplier())
 }
 // Apply applies thinking configuration to Antigravity request body.
 //
 // For Claude models, additional constraints are applied:
 //   - Ensure thinking budget < max_tokens
 //   - Remove thinkingConfig if budget < minimum allowed
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if thinking.IsUserDefinedModel(modelInfo) {
 		return a.applyCompatible(body, config, modelInfo)
 	}
 	if modelInfo.Thinking == nil {
 		return body, nil
 	}
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	isClaude := strings.Contains(strings.ToLower(modelInfo.ID), "claude")
 	// ModeAuto: Always use Budget format with thinkingBudget=-1
 	if config.Mode == thinking.ModeAuto {
 		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
 	}
 	if config.Mode == thinking.ModeBudget {
 		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
 	}
 	// For non-auto modes, choose format based on model capabilities
 	support := modelInfo.Thinking
 	if len(support.Levels) > 0 {
 		return a.applyLevelFormat(body, config)
 	}
 	return a.applyBudgetFormat(body, config, modelInfo, isClaude)
 }
 func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	isClaude := false
 	if modelInfo != nil {
 		isClaude = strings.Contains(strings.ToLower(modelInfo.ID), "claude")
 	}
 	if config.Mode == thinking.ModeAuto {
 		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
 	}
 	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
 		return a.applyLevelFormat(body, config)
 	}
 	return a.applyBudgetFormat(body, config, modelInfo, isClaude)
 }
 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 	if config.Mode == thinking.ModeNone {
 		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
 		if config.Level != "" {
 			result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
 		}
 		return result, nil
 	}
 	// Only handle ModeLevel - budget conversion should be done by upper layer
 	if config.Mode != thinking.ModeLevel {
 		return body, nil
 	}
 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
 	return result, nil
 }
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo, isClaude bool) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 	budget := config.Budget
 	includeThoughts := false
 	switch config.Mode {
 	case thinking.ModeNone:
 		includeThoughts = false
 	case thinking.ModeAuto:
 		includeThoughts = true
 	default:
 		includeThoughts = budget > 0
 	}
 	// Apply Claude-specific constraints
 	if isClaude && modelInfo != nil {
 		budget, result = a.normalizeClaudeBudget(budget, result, modelInfo)
 		// Check if budget was removed entirely
 		if budget == -2 {
 			return result, nil
 		}
 	}
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
 }
 // normalizeClaudeBudget applies Claude-specific constraints to thinking budget.
 //
 // It handles:
 //   - Ensuring thinking budget < max_tokens
 //   - Removing thinkingConfig if budget < minimum allowed
 //
 // Returns the normalized budget and updated payload.
 // Returns budget=-2 as a sentinel indicating thinkingConfig was removed entirely.
 func (a *Applier) normalizeClaudeBudget(budget int, payload []byte, modelInfo *registry.ModelInfo) (int, []byte) {
 	if modelInfo == nil {
 		return budget, payload
 	}
 	// Get effective max tokens
 	effectiveMax, setDefaultMax := a.effectiveMaxTokens(payload, modelInfo)
 	if effectiveMax > 0 && budget >= effectiveMax {
 		budget = effectiveMax - 1
 	}
 	// Check minimum budget
 	minBudget := 0
 	if modelInfo.Thinking != nil {
 		minBudget = modelInfo.Thinking.Min
 	}
 	if minBudget > 0 && budget >= 0 && budget < minBudget {
 		// Budget is below minimum, remove thinking config entirely
 		payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
 		return -2, payload
 	}
 	// Set default max tokens if needed
 	if setDefaultMax && effectiveMax > 0 {
 		payload, _ = sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax)
 	}
 	return budget, payload
 }
 // effectiveMaxTokens returns the max tokens to cap thinking:
 // prefer request-provided maxOutputTokens; otherwise fall back to model default.
 // The boolean indicates whether the value came from the model default (and thus should be written back).
 func (a *Applier) effectiveMaxTokens(payload []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
 	if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
 		return int(maxTok.Int()), false
 	}
 	if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
 		return modelInfo.MaxCompletionTokens, true
 	}
 	return 0, false
 }
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -0,0 +1,166 @@
 // Package claude implements thinking configuration scaffolding for Claude models.
 //
 // Claude models use the thinking.budget_tokens format with values in the range
 // 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5),
 // while older models do not.
 // See: _bmad-output/planning-artifacts/architecture.md#Epic-6
 package claude
 import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // Applier implements thinking.ProviderApplier for Claude models.
 // This applier is stateless and holds no configuration.
 type Applier struct{}
 // NewApplier creates a new Claude thinking applier.
 func NewApplier() *Applier {
 	return &Applier{}
 }
 func init() {
 	thinking.RegisterProvider("claude", NewApplier())
 }
 // Apply applies thinking configuration to Claude request body.
 //
 // IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig.
 // ValidateConfig handles:
 //   - Mode conversion (Level→Budget, Auto→Budget)
 //   - Budget clamping to model range
 //   - ZeroAllowed constraint enforcement
 //
 // Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged.
 //
 // Expected output format when enabled:
 //
 //	{
 //	  "thinking": {
 //	    "type": "enabled",
 //	    "budget_tokens": 16384
 //	  }
 //	}
 //
 // Expected output format when disabled:
 //
 //	{
 //	  "thinking": {
 //	    "type": "disabled"
 //	  }
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if thinking.IsUserDefinedModel(modelInfo) {
 		return applyCompatibleClaude(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		return body, nil
 	}
 	// Only process ModeBudget and ModeNone; other modes pass through
 	// (caller should use ValidateConfig first to normalize modes)
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	// Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced)
 	// Decide enabled/disabled based on budget value
 	if config.Budget == 0 {
 		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
 		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
 		return result, nil
 	}
 	result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
 	result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
 	// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
 	result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
 	return result, nil
 }
 // normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens.
 // Anthropic API requires this constraint; violating it returns a 400 error.
 func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte {
 	if budgetTokens <= 0 {
 		return body
 	}
 	// Ensure the request satisfies Claude constraints:
 	//  1) Determine effective max_tokens (request overrides model default)
 	//  2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
 	//  3) If the adjusted budget falls below the model minimum, leave the request unchanged
 	//  4) If max_tokens came from model default, write it back into the request
 	effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
 	if setDefaultMax && effectiveMax > 0 {
 		body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
 	}
 	// Compute the budget we would apply after enforcing budget_tokens < max_tokens.
 	adjustedBudget := budgetTokens
 	if effectiveMax > 0 && adjustedBudget >= effectiveMax {
 		adjustedBudget = effectiveMax - 1
 	}
 	minBudget := 0
 	if modelInfo != nil && modelInfo.Thinking != nil {
 		minBudget = modelInfo.Thinking.Min
 	}
 	if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
 		// If enforcing the max_tokens constraint would push the budget below the model minimum,
 		// leave the request unchanged.
 		return body
 	}
 	if adjustedBudget != budgetTokens {
 		body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget)
 	}
 	return body
 }
 // effectiveMaxTokens returns the max tokens to cap thinking:
 // prefer request-provided max_tokens; otherwise fall back to model default.
 // The boolean indicates whether the value came from the model default (and thus should be written back).
 func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
 	if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 {
 		return int(maxTok.Int()), false
 	}
 	if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
 		return modelInfo.MaxCompletionTokens, true
 	}
 	return 0, false
 }
 func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	switch config.Mode {
 	case thinking.ModeNone:
 		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
 		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
 		return result, nil
 	case thinking.ModeAuto:
 		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
 		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
 		return result, nil
 	default:
 		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
 		result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
 		return result, nil
 	}
 }
--- a/internal/thinking/provider/codex/apply.go
+++ b/internal/thinking/provider/codex/apply.go
@@ -0,0 +1,131 @@
 // Package codex implements thinking configuration for Codex (OpenAI Responses API) models.
 //
 // Codex models use the reasoning.effort format with discrete levels
 // (low/medium/high). This is similar to OpenAI but uses nested field
 // "reasoning.effort" instead of "reasoning_effort".
 // See: _bmad-output/planning-artifacts/architecture.md#Epic-8
 package codex
 import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // Applier implements thinking.ProviderApplier for Codex models.
 //
 // Codex-specific behavior:
 //   - Output format: reasoning.effort (string: low/medium/high/xhigh)
 //   - Level-only mode: no numeric budget support
 //   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
 type Applier struct{}
 var _ thinking.ProviderApplier = (*Applier)(nil)
 // NewApplier creates a new Codex thinking applier.
 func NewApplier() *Applier {
 	return &Applier{}
 }
 func init() {
 	thinking.RegisterProvider("codex", NewApplier())
 }
 // Apply applies thinking configuration to Codex request body.
 //
 // Expected output format:
 //
 //	{
 //	  "reasoning": {
 //	    "effort": "high"
 //	  }
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if thinking.IsUserDefinedModel(modelInfo) {
 		return applyCompatibleCodex(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		return body, nil
 	}
 	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
 	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	if config.Mode == thinking.ModeLevel {
 		result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level))
 		return result, nil
 	}
 	effort := ""
 	support := modelInfo.Thinking
 	if config.Budget == 0 {
 		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
 			effort = string(thinking.LevelNone)
 		}
 	}
 	if effort == "" && config.Level != "" {
 		effort = string(config.Level)
 	}
 	if effort == "" && len(support.Levels) > 0 {
 		effort = support.Levels[0]
 	}
 	if effort == "" {
 		return body, nil
 	}
 	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
 	return result, nil
 }
 func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	var effort string
 	switch config.Mode {
 	case thinking.ModeLevel:
 		if config.Level == "" {
 			return body, nil
 		}
 		effort = string(config.Level)
 	case thinking.ModeNone:
 		effort = string(thinking.LevelNone)
 		if config.Level != "" {
 			effort = string(config.Level)
 		}
 	case thinking.ModeAuto:
 		// Auto mode for user-defined models: pass through as "auto"
 		effort = string(thinking.LevelAuto)
 	case thinking.ModeBudget:
 		// Budget mode: convert budget to level using threshold mapping
 		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
 		if !ok {
 			return body, nil
 		}
 		effort = level
 	default:
 		return body, nil
 	}
 	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
 	return result, nil
 }
 func hasLevel(levels []string, target string) bool {
 	for _, level := range levels {
 		if strings.EqualFold(strings.TrimSpace(level), target) {
 			return true
 		}
 	}
 	return false
 }
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -0,0 +1,169 @@
 // Package gemini implements thinking configuration for Gemini models.
 //
 // Gemini models have two formats:
 //   - Gemini 2.5: Uses thinkingBudget (numeric)
 //   - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high)
 //     or thinkingBudget=-1 for auto/dynamic mode
 //
 // Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels:
 //   - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x)
 //   - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels)
 //   - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5)
 package gemini
 import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // Applier applies thinking configuration for Gemini models.
 //
 // Gemini-specific behavior:
 //   - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed
 //   - Gemini 3.x: thinkingLevel format, cannot be disabled
 //   - Use ThinkingSupport.Levels to decide output format
 type Applier struct{}
 // NewApplier creates a new Gemini thinking applier.
 func NewApplier() *Applier {
 	return &Applier{}
 }
 func init() {
 	thinking.RegisterProvider("gemini", NewApplier())
 }
 // Apply applies thinking configuration to Gemini request body.
 //
 // Expected output format (Gemini 2.5):
 //
 //	{
 //	  "generationConfig": {
 //	    "thinkingConfig": {
 //	      "thinkingBudget": 8192,
 //	      "includeThoughts": true
 //	    }
 //	  }
 //	}
 //
 // Expected output format (Gemini 3.x):
 //
 //	{
 //	  "generationConfig": {
 //	    "thinkingConfig": {
 //	      "thinkingLevel": "high",
 //	      "includeThoughts": true
 //	    }
 //	  }
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if thinking.IsUserDefinedModel(modelInfo) {
 		return a.applyCompatible(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		return body, nil
 	}
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	// Choose format based on config.Mode and model capabilities:
 	// - ModeLevel: use Level format (validation will reject unsupported levels)
 	// - ModeNone: use Level format if model has Levels, else Budget format
 	// - ModeBudget/ModeAuto: use Budget format
 	switch config.Mode {
 	case thinking.ModeLevel:
 		return a.applyLevelFormat(body, config)
 	case thinking.ModeNone:
 		// ModeNone: route based on model capability (has Levels or not)
 		if len(modelInfo.Thinking.Levels) > 0 {
 			return a.applyLevelFormat(body, config)
 		}
 		return a.applyBudgetFormat(body, config)
 	default:
 		return a.applyBudgetFormat(body, config)
 	}
 }
 func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	if config.Mode == thinking.ModeAuto {
 		return a.applyBudgetFormat(body, config)
 	}
 	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
 		return a.applyLevelFormat(body, config)
 	}
 	return a.applyBudgetFormat(body, config)
 }
 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// ModeNone semantics:
 	//   - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models)
 	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
 	// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
 	if config.Mode == thinking.ModeNone {
 		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
 		if config.Level != "" {
 			result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
 		}
 		return result, nil
 	}
 	// Only handle ModeLevel - budget conversion should be done by upper layer
 	if config.Mode != thinking.ModeLevel {
 		return body, nil
 	}
 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
 	return result, nil
 }
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
 	budget := config.Budget
 	// ModeNone semantics:
 	//   - ModeNone + Budget=0: completely disable thinking
 	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
 	// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
 	includeThoughts := false
 	switch config.Mode {
 	case thinking.ModeNone:
 		includeThoughts = false
 	case thinking.ModeAuto:
 		includeThoughts = true
 	default:
 		includeThoughts = budget > 0
 	}
 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
 }
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -0,0 +1,126 @@
 // Package geminicli implements thinking configuration for Gemini CLI API format.
 //
 // Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of
 // generationConfig.thinkingConfig.* used by standard Gemini API.
 package geminicli
 import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // Applier applies thinking configuration for Gemini CLI API format.
 type Applier struct{}
 var _ thinking.ProviderApplier = (*Applier)(nil)
 // NewApplier creates a new Gemini CLI thinking applier.
 func NewApplier() *Applier {
 	return &Applier{}
 }
 func init() {
 	thinking.RegisterProvider("gemini-cli", NewApplier())
 }
 // Apply applies thinking configuration to Gemini CLI request body.
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if thinking.IsUserDefinedModel(modelInfo) {
 		return a.applyCompatible(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		return body, nil
 	}
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	// ModeAuto: Always use Budget format with thinkingBudget=-1
 	if config.Mode == thinking.ModeAuto {
 		return a.applyBudgetFormat(body, config)
 	}
 	if config.Mode == thinking.ModeBudget {
 		return a.applyBudgetFormat(body, config)
 	}
 	// For non-auto modes, choose format based on model capabilities
 	support := modelInfo.Thinking
 	if len(support.Levels) > 0 {
 		return a.applyLevelFormat(body, config)
 	}
 	return a.applyBudgetFormat(body, config)
 }
 func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	if config.Mode == thinking.ModeAuto {
 		return a.applyBudgetFormat(body, config)
 	}
 	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
 		return a.applyLevelFormat(body, config)
 	}
 	return a.applyBudgetFormat(body, config)
 }
 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 	if config.Mode == thinking.ModeNone {
 		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
 		if config.Level != "" {
 			result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
 		}
 		return result, nil
 	}
 	// Only handle ModeLevel - budget conversion should be done by upper layer
 	if config.Mode != thinking.ModeLevel {
 		return body, nil
 	}
 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
 	return result, nil
 }
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 	budget := config.Budget
 	includeThoughts := false
 	switch config.Mode {
 	case thinking.ModeNone:
 		includeThoughts = false
 	case thinking.ModeAuto:
 		includeThoughts = true
 	default:
 		includeThoughts = budget > 0
 	}
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
 }
--- a/internal/thinking/provider/iflow/apply.go
+++ b/internal/thinking/provider/iflow/apply.go
@@ -0,0 +1,156 @@
 // Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
 //
 // iFlow models use boolean toggle semantics:
 //   - GLM models: chat_template_kwargs.enable_thinking (boolean)
 //   - MiniMax models: reasoning_split (boolean)
 //
 // Level values are converted to boolean: none=false, all others=true
 // See: _bmad-output/planning-artifacts/architecture.md#Epic-9
 package iflow
 import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // Applier implements thinking.ProviderApplier for iFlow models.
 //
 // iFlow-specific behavior:
 //   - GLM models: enable_thinking boolean + clear_thinking=false
 //   - MiniMax models: reasoning_split boolean
 //   - Level to boolean: none=false, others=true
 //   - No quantized support (only on/off)
 type Applier struct{}
 var _ thinking.ProviderApplier = (*Applier)(nil)
 // NewApplier creates a new iFlow thinking applier.
 func NewApplier() *Applier {
 	return &Applier{}
 }
 func init() {
 	thinking.RegisterProvider("iflow", NewApplier())
 }
 // Apply applies thinking configuration to iFlow request body.
 //
 // Expected output format (GLM):
 //
 //	{
 //	  "chat_template_kwargs": {
 //	    "enable_thinking": true,
 //	    "clear_thinking": false
 //	  }
 //	}
 //
 // Expected output format (MiniMax):
 //
 //	{
 //	  "reasoning_split": true
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if thinking.IsUserDefinedModel(modelInfo) {
 		return body, nil
 	}
 	if modelInfo.Thinking == nil {
 		return body, nil
 	}
 	if isGLMModel(modelInfo.ID) {
 		return applyGLM(body, config), nil
 	}
 	if isMiniMaxModel(modelInfo.ID) {
 		return applyMiniMax(body, config), nil
 	}
 	return body, nil
 }
 // configToBoolean converts ThinkingConfig to boolean for iFlow models.
 //
 // Conversion rules:
 //   - ModeNone: false
 //   - ModeAuto: true
 //   - ModeBudget + Budget=0: false
 //   - ModeBudget + Budget>0: true
 //   - ModeLevel + Level="none": false
 //   - ModeLevel + any other level: true
 //   - Default (unknown mode): true
 func configToBoolean(config thinking.ThinkingConfig) bool {
 	switch config.Mode {
 	case thinking.ModeNone:
 		return false
 	case thinking.ModeAuto:
 		return true
 	case thinking.ModeBudget:
 		return config.Budget > 0
 	case thinking.ModeLevel:
 		return config.Level != thinking.LevelNone
 	default:
 		return true
 	}
 }
 // applyGLM applies thinking configuration for GLM models.
 //
 // Output format when enabled:
 //
 //	{"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}}
 //
 // Output format when disabled:
 //
 //	{"chat_template_kwargs": {"enable_thinking": false}}
 //
 // Note: clear_thinking is only set when thinking is enabled, to preserve
 // thinking output in the response.
 func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
 	enableThinking := configToBoolean(config)
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
 	// clear_thinking only needed when thinking is enabled
 	if enableThinking {
 		result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
 	}
 	return result
 }
 // applyMiniMax applies thinking configuration for MiniMax models.
 //
 // Output format:
 //
 //	{"reasoning_split": true/false}
 func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
 	reasoningSplit := configToBoolean(config)
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit)
 	return result
 }
 // isGLMModel determines if the model is a GLM series model.
 // GLM models use chat_template_kwargs.enable_thinking format.
 func isGLMModel(modelID string) bool {
 	return strings.HasPrefix(strings.ToLower(modelID), "glm")
 }
 // isMiniMaxModel determines if the model is a MiniMax series model.
 // MiniMax models use reasoning_split format.
 func isMiniMaxModel(modelID string) bool {
 	return strings.HasPrefix(strings.ToLower(modelID), "minimax")
 }
--- a/internal/thinking/provider/openai/apply.go
+++ b/internal/thinking/provider/openai/apply.go
@@ -0,0 +1,128 @@
 // Package openai implements thinking configuration for OpenAI/Codex models.
 //
 // OpenAI models use the reasoning_effort format with discrete levels
 // (low/medium/high). Some models support xhigh and none levels.
 // See: _bmad-output/planning-artifacts/architecture.md#Epic-8
 package openai
 import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // Applier implements thinking.ProviderApplier for OpenAI models.
 //
 // OpenAI-specific behavior:
 //   - Output format: reasoning_effort (string: low/medium/high/xhigh)
 //   - Level-only mode: no numeric budget support
 //   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
 type Applier struct{}
 var _ thinking.ProviderApplier = (*Applier)(nil)
 // NewApplier creates a new OpenAI thinking applier.
 func NewApplier() *Applier {
 	return &Applier{}
 }
 func init() {
 	thinking.RegisterProvider("openai", NewApplier())
 }
 // Apply applies thinking configuration to OpenAI request body.
 //
 // Expected output format:
 //
 //	{
 //	  "reasoning_effort": "high"
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if thinking.IsUserDefinedModel(modelInfo) {
 		return applyCompatibleOpenAI(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		return body, nil
 	}
 	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
 	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
 		return body, nil
 	}
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	if config.Mode == thinking.ModeLevel {
 		result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level))
 		return result, nil
 	}
 	effort := ""
 	support := modelInfo.Thinking
 	if config.Budget == 0 {
 		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
 			effort = string(thinking.LevelNone)
 		}
 	}
 	if effort == "" && config.Level != "" {
 		effort = string(config.Level)
 	}
 	if effort == "" && len(support.Levels) > 0 {
 		effort = support.Levels[0]
 	}
 	if effort == "" {
 		return body, nil
 	}
 	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
 	return result, nil
 }
 func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		body = []byte(`{}`)
 	}
 	var effort string
 	switch config.Mode {
 	case thinking.ModeLevel:
 		if config.Level == "" {
 			return body, nil
 		}
 		effort = string(config.Level)
 	case thinking.ModeNone:
 		effort = string(thinking.LevelNone)
 		if config.Level != "" {
 			effort = string(config.Level)
 		}
 	case thinking.ModeAuto:
 		// Auto mode for user-defined models: pass through as "auto"
 		effort = string(thinking.LevelAuto)
 	case thinking.ModeBudget:
 		// Budget mode: convert budget to level using threshold mapping
 		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
 		if !ok {
 			return body, nil
 		}
 		effort = level
 	default:
 		return body, nil
 	}
 	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
 	return result, nil
 }
 func hasLevel(levels []string, target string) bool {
 	for _, level := range levels {
 		if strings.EqualFold(strings.TrimSpace(level), target) {
 			return true
 		}
 	}
 	return false
 }
--- a/internal/thinking/strip.go
+++ b/internal/thinking/strip.go
@@ -0,0 +1,58 @@
 // Package thinking provides unified thinking configuration processing.
 package thinking
 import (
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // StripThinkingConfig removes thinking configuration fields from request body.
 //
 // This function is used when a model doesn't support thinking but the request
 // contains thinking configuration. The configuration is silently removed to
 // prevent upstream API errors.
 //
 // Parameters:
 //   - body: Original request body JSON
 //   - provider: Provider name (determines which fields to strip)
 //
 // Returns:
 //   - Modified request body JSON with thinking configuration removed
 //   - Original body is returned unchanged if:
 //   - body is empty or invalid JSON
 //   - provider is unknown
 //   - no thinking configuration found
 func StripThinkingConfig(body []byte, provider string) []byte {
 	if len(body) == 0 || !gjson.ValidBytes(body) {
 		return body
 	}
 	var paths []string
 	switch provider {
 	case "claude":
 		paths = []string{"thinking"}
 	case "gemini":
 		paths = []string{"generationConfig.thinkingConfig"}
 	case "gemini-cli", "antigravity":
 		paths = []string{"request.generationConfig.thinkingConfig"}
 	case "openai":
 		paths = []string{"reasoning_effort"}
 	case "codex":
 		paths = []string{"reasoning.effort"}
 	case "iflow":
 		paths = []string{
 			"chat_template_kwargs.enable_thinking",
 			"chat_template_kwargs.clear_thinking",
 			"reasoning_split",
 			"reasoning_effort",
 		}
 	default:
 		return body
 	}
 	result := body
 	for _, path := range paths {
 		result, _ = sjson.DeleteBytes(result, path)
 	}
 	return result
 }
--- a/internal/thinking/suffix.go
+++ b/internal/thinking/suffix.go
@@ -0,0 +1,146 @@
 // Package thinking provides unified thinking configuration processing.
 //
 // This file implements suffix parsing functionality for extracting
 // thinking configuration from model names in the format model(value).
 package thinking
 import (
 	"strconv"
 	"strings"
 )
 // ParseSuffix extracts thinking suffix from a model name.
 //
 // The suffix format is: model-name(value)
 // Examples:
 //   - "claude-sonnet-4-5(16384)" -> ModelName="claude-sonnet-4-5", RawSuffix="16384"
 //   - "gpt-5.2(high)" -> ModelName="gpt-5.2", RawSuffix="high"
 //   - "gemini-2.5-pro" -> ModelName="gemini-2.5-pro", HasSuffix=false
 //
 // This function only extracts the suffix; it does not validate or interpret
 // the suffix content. Use ParseNumericSuffix, ParseLevelSuffix, etc. for
 // content interpretation.
 func ParseSuffix(model string) SuffixResult {
 	// Find the last opening parenthesis
 	lastOpen := strings.LastIndex(model, "(")
 	if lastOpen == -1 {
 		return SuffixResult{ModelName: model, HasSuffix: false}
 	}
 	// Check if the string ends with a closing parenthesis
 	if !strings.HasSuffix(model, ")") {
 		return SuffixResult{ModelName: model, HasSuffix: false}
 	}
 	// Extract components
 	modelName := model[:lastOpen]
 	rawSuffix := model[lastOpen+1 : len(model)-1]
 	return SuffixResult{
 		ModelName: modelName,
 		HasSuffix: true,
 		RawSuffix: rawSuffix,
 	}
 }
 // ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value.
 //
 // This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer.
 // Only non-negative integers are considered valid numeric suffixes.
 //
 // Platform note: The budget value uses Go's int type, which is 32-bit on 32-bit
 // systems and 64-bit on 64-bit systems. Values exceeding the platform's int range
 // will return ok=false.
 //
 // Leading zeros are accepted: "08192" parses as 8192.
 //
 // Examples:
 //   - "8192" -> budget=8192, ok=true
 //   - "0" -> budget=0, ok=true (represents ModeNone)
 //   - "08192" -> budget=8192, ok=true (leading zeros accepted)
 //   - "-1" -> budget=0, ok=false (negative numbers are not valid numeric suffixes)
 //   - "high" -> budget=0, ok=false (not a number)
 //   - "9223372036854775808" -> budget=0, ok=false (overflow on 64-bit systems)
 //
 // For special handling of -1 as auto mode, use ParseSpecialSuffix instead.
 func ParseNumericSuffix(rawSuffix string) (budget int, ok bool) {
 	if rawSuffix == "" {
 		return 0, false
 	}
 	value, err := strconv.Atoi(rawSuffix)
 	if err != nil {
 		return 0, false
 	}
 	// Negative numbers are not valid numeric suffixes
 	// -1 should be handled by special value parsing as "auto"
 	if value < 0 {
 		return 0, false
 	}
 	return value, true
 }
 // ParseSpecialSuffix attempts to parse a raw suffix as a special thinking mode value.
 //
 // This function handles special strings that represent a change in thinking mode:
 //   - "none" -> ModeNone (disables thinking)
 //   - "auto" -> ModeAuto (automatic/dynamic thinking)
 //   - "-1"   -> ModeAuto (numeric representation of auto mode)
 //
 // String values are case-insensitive.
 func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) {
 	if rawSuffix == "" {
 		return ModeBudget, false
 	}
 	// Case-insensitive matching
 	switch strings.ToLower(rawSuffix) {
 	case "none":
 		return ModeNone, true
 	case "auto", "-1":
 		return ModeAuto, true
 	default:
 		return ModeBudget, false
 	}
 }
 // ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level.
 //
 // This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level.
 // Only discrete effort levels are valid: minimal, low, medium, high, xhigh.
 // Level matching is case-insensitive.
 //
 // Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix
 // instead. This separation allows callers to prioritize special value handling.
 //
 // Examples:
 //   - "high" -> level=LevelHigh, ok=true
 //   - "HIGH" -> level=LevelHigh, ok=true (case insensitive)
 //   - "medium" -> level=LevelMedium, ok=true
 //   - "none" -> level="", ok=false (special value, use ParseSpecialSuffix)
 //   - "auto" -> level="", ok=false (special value, use ParseSpecialSuffix)
 //   - "8192" -> level="", ok=false (numeric, use ParseNumericSuffix)
 //   - "ultra" -> level="", ok=false (unknown level)
 func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) {
 	if rawSuffix == "" {
 		return "", false
 	}
 	// Case-insensitive matching
 	switch strings.ToLower(rawSuffix) {
 	case "minimal":
 		return LevelMinimal, true
 	case "low":
 		return LevelLow, true
 	case "medium":
 		return LevelMedium, true
 	case "high":
 		return LevelHigh, true
 	case "xhigh":
 		return LevelXHigh, true
 	default:
 		return "", false
 	}
 }
--- a/internal/thinking/text.go
+++ b/internal/thinking/text.go
@@ -0,0 +1,41 @@
 package thinking
 import (
 	"github.com/tidwall/gjson"
 )
 // GetThinkingText extracts the thinking text from a content part.
 // Handles various formats:
 // - Simple string: { "thinking": "text" } or { "text": "text" }
 // - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
 // - Gemini-style: { "thought": true, "text": "text" }
 // Returns the extracted text string.
 func GetThinkingText(part gjson.Result) string {
 	// Try direct text field first (Gemini-style)
 	if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
 		return text.String()
 	}
 	// Try thinking field
 	thinkingField := part.Get("thinking")
 	if !thinkingField.Exists() {
 		return ""
 	}
 	// thinking is a string
 	if thinkingField.Type == gjson.String {
 		return thinkingField.String()
 	}
 	// thinking is an object with inner text/thinking
 	if thinkingField.IsObject() {
 		if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String {
 			return inner.String()
 		}
 		if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String {
 			return inner.String()
 		}
 	}
 	return ""
 }
--- a/internal/thinking/types.go
+++ b/internal/thinking/types.go
@@ -0,0 +1,116 @@
 // Package thinking provides unified thinking configuration processing.
 //
 // This package offers a unified interface for parsing, validating, and applying
 // thinking configurations across various AI providers (Claude, Gemini, OpenAI, iFlow).
 package thinking
 import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 // ThinkingMode represents the type of thinking configuration mode.
 type ThinkingMode int
 const (
 	// ModeBudget indicates using a numeric budget (corresponds to suffix "(1000)" etc.)
 	ModeBudget ThinkingMode = iota
 	// ModeLevel indicates using a discrete level (corresponds to suffix "(high)" etc.)
 	ModeLevel
 	// ModeNone indicates thinking is disabled (corresponds to suffix "(none)" or budget=0)
 	ModeNone
 	// ModeAuto indicates automatic/dynamic thinking (corresponds to suffix "(auto)" or budget=-1)
 	ModeAuto
 )
 // String returns the string representation of ThinkingMode.
 func (m ThinkingMode) String() string {
 	switch m {
 	case ModeBudget:
 		return "budget"
 	case ModeLevel:
 		return "level"
 	case ModeNone:
 		return "none"
 	case ModeAuto:
 		return "auto"
 	default:
 		return "unknown"
 	}
 }
 // ThinkingLevel represents a discrete thinking level.
 type ThinkingLevel string
 const (
 	// LevelNone disables thinking
 	LevelNone ThinkingLevel = "none"
 	// LevelAuto enables automatic/dynamic thinking
 	LevelAuto ThinkingLevel = "auto"
 	// LevelMinimal sets minimal thinking effort
 	LevelMinimal ThinkingLevel = "minimal"
 	// LevelLow sets low thinking effort
 	LevelLow ThinkingLevel = "low"
 	// LevelMedium sets medium thinking effort
 	LevelMedium ThinkingLevel = "medium"
 	// LevelHigh sets high thinking effort
 	LevelHigh ThinkingLevel = "high"
 	// LevelXHigh sets extra-high thinking effort
 	LevelXHigh ThinkingLevel = "xhigh"
 )
 // ThinkingConfig represents a unified thinking configuration.
 //
 // This struct is used to pass thinking configuration information between components.
 // Depending on Mode, either Budget or Level field is effective:
 //   - ModeNone: Budget=0, Level is ignored
 //   - ModeAuto: Budget=-1, Level is ignored
 //   - ModeBudget: Budget is a positive integer, Level is ignored
 //   - ModeLevel: Budget is ignored, Level is a valid level
 type ThinkingConfig struct {
 	// Mode specifies the configuration mode
 	Mode ThinkingMode
 	// Budget is the thinking budget (token count), only effective when Mode is ModeBudget.
 	// Special values: 0 means disabled, -1 means automatic
 	Budget int
 	// Level is the thinking level, only effective when Mode is ModeLevel
 	Level ThinkingLevel
 }
 // SuffixResult represents the result of parsing a model name for thinking suffix.
 //
 // A thinking suffix is specified in the format model-name(value), where value
 // can be a numeric budget (e.g., "16384") or a level name (e.g., "high").
 type SuffixResult struct {
 	// ModelName is the model name with the suffix removed.
 	// If no suffix was found, this equals the original input.
 	ModelName string
 	// HasSuffix indicates whether a valid suffix was found.
 	HasSuffix bool
 	// RawSuffix is the content inside the parentheses, without the parentheses.
 	// Empty string if HasSuffix is false.
 	RawSuffix string
 }
 // ProviderApplier defines the interface for provider-specific thinking configuration application.
 //
 // Types implementing this interface are responsible for converting a unified ThinkingConfig
 // into provider-specific format and applying it to the request body.
 //
 // Implementation requirements:
 //   - Apply method must be idempotent
 //   - Must not modify the input config or modelInfo
 //   - Returns a modified copy of the request body
 //   - Returns appropriate ThinkingError for unsupported configurations
 type ProviderApplier interface {
 	// Apply applies the thinking configuration to the request body.
 	//
 	// Parameters:
 	//   - body: Original request body JSON
 	//   - config: Unified thinking configuration
 	//   - modelInfo: Model registry information containing ThinkingSupport properties
 	//
 	// Returns:
 	//   - Modified request body JSON
 	//   - ThinkingError if the configuration is invalid or unsupported
 	Apply(body []byte, config ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error)
 }
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -0,0 +1,378 @@
 // Package thinking provides unified thinking configuration processing logic.
 package thinking
 import (
 	"fmt"
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	log "github.com/sirupsen/logrus"
 )
 // ValidateConfig validates a thinking configuration against model capabilities.
 //
 // This function performs comprehensive validation:
 //   - Checks if the model supports thinking
 //   - Auto-converts between Budget and Level formats based on model capability
 //   - Validates that requested level is in the model's supported levels list
 //   - Clamps budget values to model's allowed range
 //   - When converting Budget -> Level for level-only models, clamps the derived standard level to the nearest supported level
 //     (special values none/auto are preserved)
 //   - When config comes from a model suffix, strict budget validation is disabled (we clamp instead of error)
 //
 // Parameters:
 //   - config: The thinking configuration to validate
 //   - support: Model's ThinkingSupport properties (nil means no thinking support)
 //   - fromFormat: Source provider format (used to determine strict validation rules)
 //   - toFormat: Target provider format
 //   - fromSuffix: Whether config was sourced from model suffix
 //
 // Returns:
 //   - Normalized ThinkingConfig with clamped values
 //   - ThinkingError if validation fails (ErrThinkingNotSupported, ErrLevelNotSupported, etc.)
 //
 // Auto-conversion behavior:
 //   - Budget-only model + Level config → Level converted to Budget
 //   - Level-only model + Budget config → Budget converted to Level
 //   - Hybrid model → preserve original format
 func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string, fromSuffix bool) (*ThinkingConfig, error) {
 	fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat))
 	model := "unknown"
 	support := (*registry.ThinkingSupport)(nil)
 	if modelInfo != nil {
 		if modelInfo.ID != "" {
 			model = modelInfo.ID
 		}
 		support = modelInfo.Thinking
 	}
 	if support == nil {
 		if config.Mode != ModeNone {
 			return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", model)
 		}
 		return &config, nil
 	}
 	allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat)
 	strictBudget := !fromSuffix && fromFormat != "" && isSameProviderFamily(fromFormat, toFormat)
 	budgetDerivedFromLevel := false
 	capability := detectModelCapability(modelInfo)
 	switch capability {
 	case CapabilityBudgetOnly:
 		if config.Mode == ModeLevel {
 			if config.Level == LevelAuto {
 				break
 			}
 			budget, ok := ConvertLevelToBudget(string(config.Level))
 			if !ok {
 				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", config.Level))
 			}
 			config.Mode = ModeBudget
 			config.Budget = budget
 			config.Level = ""
 			budgetDerivedFromLevel = true
 		}
 	case CapabilityLevelOnly:
 		if config.Mode == ModeBudget {
 			level, ok := ConvertBudgetToLevel(config.Budget)
 			if !ok {
 				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", config.Budget))
 			}
 			// When converting Budget -> Level for level-only models, clamp the derived standard level
 			// to the nearest supported level. Special values (none/auto) are preserved.
 			config.Mode = ModeLevel
 			config.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat)
 			config.Budget = 0
 		}
 	case CapabilityHybrid:
 	}
 	if config.Mode == ModeLevel && config.Level == LevelNone {
 		config.Mode = ModeNone
 		config.Budget = 0
 		config.Level = ""
 	}
 	if config.Mode == ModeLevel && config.Level == LevelAuto {
 		config.Mode = ModeAuto
 		config.Budget = -1
 		config.Level = ""
 	}
 	if config.Mode == ModeBudget && config.Budget == 0 {
 		config.Mode = ModeNone
 		config.Level = ""
 	}
 	if len(support.Levels) > 0 && config.Mode == ModeLevel {
 		if !isLevelSupported(string(config.Level), support.Levels) {
 			if allowClampUnsupported {
 				config.Level = clampLevel(config.Level, modelInfo, toFormat)
 			}
 			if !isLevelSupported(string(config.Level), support.Levels) {
 				// User explicitly specified an unsupported level - return error
 				// (budget-derived levels may be clamped based on source format)
 				validLevels := normalizeLevels(support.Levels)
 				message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(config.Level)), strings.Join(validLevels, ", "))
 				return nil, NewThinkingError(ErrLevelNotSupported, message)
 			}
 		}
 	}
 	if strictBudget && config.Mode == ModeBudget && !budgetDerivedFromLevel {
 		min, max := support.Min, support.Max
 		if min != 0 || max != 0 {
 			if config.Budget < min || config.Budget > max || (config.Budget == 0 && !support.ZeroAllowed) {
 				message := fmt.Sprintf("budget %d out of range [%d,%d]", config.Budget, min, max)
 				return nil, NewThinkingError(ErrBudgetOutOfRange, message)
 			}
 		}
 	}
 	// Convert ModeAuto to mid-range if dynamic not allowed
 	if config.Mode == ModeAuto && !support.DynamicAllowed {
 		config = convertAutoToMidRange(config, support, toFormat, model)
 	}
 	if config.Mode == ModeNone && toFormat == "claude" {
 		// Claude supports explicit disable via thinking.type="disabled".
 		// Keep Budget=0 so applier can omit budget_tokens.
 		config.Budget = 0
 		config.Level = ""
 	} else {
 		switch config.Mode {
 		case ModeBudget, ModeAuto, ModeNone:
 			config.Budget = clampBudget(config.Budget, modelInfo, toFormat)
 		}
 		// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
 		// This ensures Apply layer doesn't need to access support.Levels
 		if config.Mode == ModeNone && config.Budget > 0 && len(support.Levels) > 0 {
 			config.Level = ThinkingLevel(support.Levels[0])
 		}
 	}
 	return &config, nil
 }
 // convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
 //
 // This function handles the case where a model does not support dynamic/auto thinking.
 // The auto mode is silently converted to a fixed value based on model capability:
 //   - Level-only models: convert to ModeLevel with LevelMedium
 //   - Budget models: convert to ModeBudget with mid = (Min + Max) / 2
 //
 // Logging:
 //   - Debug level when conversion occurs
 //   - Fields: original_mode, clamped_to, reason
 func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport, provider, model string) ThinkingConfig {
 	// For level-only models (has Levels but no Min/Max range), use ModeLevel with medium
 	if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 {
 		config.Mode = ModeLevel
 		config.Level = LevelMedium
 		config.Budget = 0
 		log.WithFields(log.Fields{
 			"provider":      provider,
 			"model":         model,
 			"original_mode": "auto",
 			"clamped_to":    string(LevelMedium),
 		}).Debug("thinking: mode converted, dynamic not allowed, using medium level |")
 		return config
 	}
 	// For budget models, use mid-range budget
 	mid := (support.Min + support.Max) / 2
 	if mid <= 0 && support.ZeroAllowed {
 		config.Mode = ModeNone
 		config.Budget = 0
 	} else if mid <= 0 {
 		config.Mode = ModeBudget
 		config.Budget = support.Min
 	} else {
 		config.Mode = ModeBudget
 		config.Budget = mid
 	}
 	log.WithFields(log.Fields{
 		"provider":      provider,
 		"model":         model,
 		"original_mode": "auto",
 		"clamped_to":    config.Budget,
 	}).Debug("thinking: mode converted, dynamic not allowed |")
 	return config
 }
 // standardLevelOrder defines the canonical ordering of thinking levels from lowest to highest.
 var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh}
 // clampLevel clamps the given level to the nearest supported level.
 // On tie, prefers the lower level.
 func clampLevel(level ThinkingLevel, modelInfo *registry.ModelInfo, provider string) ThinkingLevel {
 	model := "unknown"
 	var supported []string
 	if modelInfo != nil {
 		if modelInfo.ID != "" {
 			model = modelInfo.ID
 		}
 		if modelInfo.Thinking != nil {
 			supported = modelInfo.Thinking.Levels
 		}
 	}
 	if len(supported) == 0 || isLevelSupported(string(level), supported) {
 		return level
 	}
 	pos := levelIndex(string(level))
 	if pos == -1 {
 		return level
 	}
 	bestIdx, bestDist := -1, len(standardLevelOrder)+1
 	for _, s := range supported {
 		if idx := levelIndex(strings.TrimSpace(s)); idx != -1 {
 			if dist := abs(pos - idx); dist < bestDist || (dist == bestDist && idx < bestIdx) {
 				bestIdx, bestDist = idx, dist
 			}
 		}
 	}
 	if bestIdx >= 0 {
 		clamped := standardLevelOrder[bestIdx]
 		log.WithFields(log.Fields{
 			"provider":       provider,
 			"model":          model,
 			"original_value": string(level),
 			"clamped_to":     string(clamped),
 		}).Debug("thinking: level clamped |")
 		return clamped
 	}
 	return level
 }
 // clampBudget clamps a budget value to the model's supported range.
 func clampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
 	model := "unknown"
 	support := (*registry.ThinkingSupport)(nil)
 	if modelInfo != nil {
 		if modelInfo.ID != "" {
 			model = modelInfo.ID
 		}
 		support = modelInfo.Thinking
 	}
 	if support == nil {
 		return value
 	}
 	// Auto value (-1) passes through without clamping.
 	if value == -1 {
 		return value
 	}
 	min, max := support.Min, support.Max
 	if value == 0 && !support.ZeroAllowed {
 		log.WithFields(log.Fields{
 			"provider":       provider,
 			"model":          model,
 			"original_value": value,
 			"clamped_to":     min,
 			"min":            min,
 			"max":            max,
 		}).Warn("thinking: budget zero not allowed |")
 		return min
 	}
 	// Some models are level-only and do not define numeric budget ranges.
 	if min == 0 && max == 0 {
 		return value
 	}
 	if value < min {
 		if value == 0 && support.ZeroAllowed {
 			return 0
 		}
 		logClamp(provider, model, value, min, min, max)
 		return min
 	}
 	if value > max {
 		logClamp(provider, model, value, max, min, max)
 		return max
 	}
 	return value
 }
 func isLevelSupported(level string, supported []string) bool {
 	for _, s := range supported {
 		if strings.EqualFold(level, strings.TrimSpace(s)) {
 			return true
 		}
 	}
 	return false
 }
 func levelIndex(level string) int {
 	for i, l := range standardLevelOrder {
 		if strings.EqualFold(level, string(l)) {
 			return i
 		}
 	}
 	return -1
 }
 func normalizeLevels(levels []string) []string {
 	out := make([]string, len(levels))
 	for i, l := range levels {
 		out[i] = strings.ToLower(strings.TrimSpace(l))
 	}
 	return out
 }
 func isBudgetBasedProvider(provider string) bool {
 	switch provider {
 	case "gemini", "gemini-cli", "antigravity", "claude":
 		return true
 	default:
 		return false
 	}
 }
 func isLevelBasedProvider(provider string) bool {
 	switch provider {
 	case "openai", "openai-response", "codex":
 		return true
 	default:
 		return false
 	}
 }
 func isGeminiFamily(provider string) bool {
 	switch provider {
 	case "gemini", "gemini-cli", "antigravity":
 		return true
 	default:
 		return false
 	}
 }
 func isSameProviderFamily(from, to string) bool {
 	if from == to {
 		return true
 	}
 	return isGeminiFamily(from) && isGeminiFamily(to)
 }
 func abs(x int) int {
 	if x < 0 {
 		return -x
 	}
 	return x
 }
 func logClamp(provider, model string, original, clampedTo, min, max int) {
 	log.WithFields(log.Fields{
 		"provider":       provider,
 		"model":          model,
 		"original_value": original,
 		"min":            min,
 		"max":            max,
 		"clamped_to":     clampedTo,
 	}).Debug("thinking: budget clamped |")
 }
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
@@ -122,7 +123,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					contentTypeResult := contentResult.Get("type")
 					if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
 						// Use GetThinkingText to handle wrapped thinking objects
-						thinkingText := util.GetThinkingText(contentResult)
+						thinkingText := thinking.GetThinkingText(contentResult)
 						signatureResult := contentResult.Get("signature")
 						clientSignature := ""
 						if signatureResult.Exists() && signatureResult.String() != "" {
 							clientSignature = signatureResult.String()
 						}
 						// Always try cached signature first (more reliable than client-provided)
 						// Client may send stale or invalid signatures from different sessions
@@ -380,12 +386,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	}
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
@@ -380,8 +380,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingConfig(t *testing.T) {
 		if thinkingConfig.Get("thinkingBudget").Int() != 8000 {
 			t.Errorf("Expected thinkingBudget 8000, got %d", thinkingConfig.Get("thinkingBudget").Int())
 		}
-		if !thinkingConfig.Get("include_thoughts").Bool() {
+		if !thinkingConfig.Get("includeThoughts").Bool() {
-			t.Error("include_thoughts should be true")
+			t.Error("includeThoughts should be true")
 		}
 	} else {
 		t.Log("thinkingConfig not present - model may not be registered in test registry")
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -35,66 +35,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)
-	// Reasoning effort -> thinkingBudget/include_thoughts
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
+	if re.Exists() {
 	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		if util.IsGemini3Model(modelName) {
+		if effort != "" {
-			switch effort {
+			thinkingPath := "request.generationConfig.thinkingConfig"
-			case "none":
+			if effort == "auto" {
-				out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig")
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
-			case "auto":
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
-				includeThoughts := true
+			} else {
-				out = util.ApplyGeminiCLIThinkingLevel(out, "", &includeThoughts)
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
-			default:
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 				if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
 					out = util.ApplyGeminiCLIThinkingLevel(out, level, nil)
 				}
 			}
 		} else if !util.ModelUsesThinkingLevels(modelName) {
 			out = util.ApplyReasoningEffortToGeminiCLI(out, effort)
 		}
 	}
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
 			if v := tc.Get("thinkingBudget"); v.Exists() {
 				budget = int(v.Int())
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
 				budget = int(v.Int())
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}
 			if v := tc.Get("includeThoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
 	}
 	// Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens
 	// This allows Claude Code and other Claude API clients to pass thinking configuration
 	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) {
 		if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 			if t.Get("type").String() == "enabled" {
 				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 					budget := int(b.Int())
 					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 				}
 			}
 		}
 	}
@@ -179,6 +132,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			}
 		}
 		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -188,16 +142,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
 					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
 					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
 						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
 							systemPartIndex++
 						}
 					}
 				}
@@ -212,7 +169,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range items {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
 							if text != "" {
 								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
 							}
 							p++
 						case "image_url":
 							imageURL := item.Get("image_url.url").String()
@@ -256,6 +216,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
 							text := item.Get("text").String()
 							if text != "" {
 								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
 							}
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -15,6 +15,7 @@ import (
 	"strings"
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -114,15 +115,40 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
+		// Translator only does format conversion, ApplyThinking handles model capability validation.
-		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			// Check for thinkingBudget first - if present, enable thinking with budget
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
-			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
+				level := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
 				switch level {
 				case "":
 				case "none":
 					out, _ = sjson.Set(out, "thinking.type", "disabled")
 					out, _ = sjson.Delete(out, "thinking.budget_tokens")
 				case "auto":
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
 					out, _ = sjson.Delete(out, "thinking.budget_tokens")
 				default:
 					if budget, ok := thinking.ConvertLevelToBudget(level); ok {
 						out, _ = sjson.Set(out, "thinking.type", "enabled")
 						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 					}
 				}
 			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
 				budget := int(thinkingBudget.Int())
 				switch budget {
 				case 0:
 					out, _ = sjson.Set(out, "thinking.type", "disabled")
 					out, _ = sjson.Delete(out, "thinking.budget_tokens")
 				case -1:
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
 					out, _ = sjson.Delete(out, "thinking.budget_tokens")
 				default:
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
 					out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 				}
 			} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
 				out, _ = sjson.Set(out, "thinking.type", "enabled")
 				normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
 				out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
 			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
 				// Fallback to include_thoughts if no budget specified
 				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -15,7 +15,7 @@ import (
 	"strings"
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -65,10 +65,11 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 	root := gjson.ParseBytes(rawJSON)
-	if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	// Convert OpenAI reasoning_effort to Claude thinking config.
 	if v := root.Get("reasoning_effort"); v.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(v.String()))
 		if effort != "" {
-			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			budget, ok := thinking.ConvertLevelToBudget(effort)
 			if ok {
 				switch budget {
 				case 0:
@@ -137,17 +138,35 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 	// Process messages and transform them to Claude Code format
 	if messages := root.Get("messages"); messages.Exists() && messages.IsArray() {
 		messageIndex := 0
 		systemMessageIndex := -1
 		messages.ForEach(func(_, message gjson.Result) bool {
 			role := message.Get("role").String()
 			contentResult := message.Get("content")
 			switch role {
-			case "system", "user", "assistant":
+			case "system":
-				// Create Claude Code message with appropriate role mapping
+				if systemMessageIndex == -1 {
-				if role == "system" {
+					systemMsg := `{"role":"user","content":[]}`
-					role = "user"
+					out, _ = sjson.SetRaw(out, "messages.-1", systemMsg)
 					systemMessageIndex = messageIndex
 					messageIndex++
 				}
-
+				if contentResult.Exists() && contentResult.Type == gjson.String && contentResult.String() != "" {
 					textPart := `{"type":"text","text":""}`
 					textPart, _ = sjson.Set(textPart, "text", contentResult.String())
 					out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
 				} else if contentResult.Exists() && contentResult.IsArray() {
 					contentResult.ForEach(func(_, part gjson.Result) bool {
 						if part.Get("type").String() == "text" {
 							textPart := `{"type":"text","text":""}`
 							textPart, _ = sjson.Set(textPart, "text", part.Get("text").String())
 							out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
 						}
 						return true
 					})
 				}
 			case "user", "assistant":
 				msg := `{"role":"","content":[]}`
 				msg, _ = sjson.Set(msg, "role", role)
@@ -226,6 +245,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				}
 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
 				messageIndex++
 			case "tool":
 				// Handle tool result messages conversion
@@ -236,6 +256,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				msg, _ = sjson.Set(msg, "content.0.tool_use_id", toolCallID)
 				msg, _ = sjson.Set(msg, "content.0.content", content)
 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
 				messageIndex++
 			}
 			return true
 		})
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,7 +10,7 @@ import (
 	"strings"
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -53,10 +53,11 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
 	root := gjson.ParseBytes(rawJSON)
-	if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	// Convert OpenAI Responses reasoning.effort to Claude thinking config.
 	if v := root.Get("reasoning.effort"); v.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(v.String()))
 		if effort != "" {
-			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			budget, ok := thinking.ConvertLevelToBudget(effort)
 			if ok {
 				switch budget {
 				case 0:
--- a/internal/translator/claude/openai/responses/claude_openai-responses_response.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_response.go
@@ -251,6 +251,7 @@ func ConvertClaudeResponseToOpenAIResponses(ctx context.Context, modelName strin
 			itemDone, _ = sjson.Set(itemDone, "item.id", fmt.Sprintf("fc_%s", st.CurrentFCID))
 			itemDone, _ = sjson.Set(itemDone, "item.arguments", args)
 			itemDone, _ = sjson.Set(itemDone, "item.call_id", st.CurrentFCID)
 			itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx])
 			out = append(out, emitEvent("response.output_item.done", itemDone))
 			st.InFuncBlock = false
 		} else if st.ReasoningActive {
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,7 +12,7 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -51,7 +51,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	systemsResult := rootResult.Get("system")
 	if systemsResult.IsArray() {
 		systemResults := systemsResult.Array()
-		message := `{"type":"message","role":"user","content":[]}`
+		message := `{"type":"message","role":"developer","content":[]}`
 		for i := 0; i < len(systemResults); i++ {
 			systemResult := systemResults[i]
 			systemTypeResult := systemResult.Get("type")
@@ -217,21 +217,19 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)
-	// Convert thinking.budget_tokens to reasoning.effort for level-based models
+	// Convert thinking.budget_tokens to reasoning.effort.
-	reasoningEffort := "medium" // default
+	reasoningEffort := "medium"
-	if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+	if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-		switch thinking.Get("type").String() {
+		switch thinkingConfig.Get("type").String() {
 		case "enabled":
-			if util.ModelUsesThinkingLevels(modelName) {
+			if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
-				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+				budget := int(budgetTokens.Int())
-					budget := int(budgetTokens.Int())
+				if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					reasoningEffort = effort
 						reasoningEffort = effort
 					}
 				}
 			}
 		case "disabled":
-			if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
+			if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
 				reasoningEffort = effort
 			}
 		}
@@ -243,21 +241,23 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	template, _ = sjson.Set(template, "include", []string{"reasoning.encrypted_content"})
 	// Add a first message to ignore system instructions and ensure proper execution.
-	inputResult := gjson.Get(template, "input")
+	if misc.GetCodexInstructionsEnabled() {
-	if inputResult.Exists() && inputResult.IsArray() {
+		inputResult := gjson.Get(template, "input")
-		inputResults := inputResult.Array()
+		if inputResult.Exists() && inputResult.IsArray() {
-		newInput := "[]"
+			inputResults := inputResult.Array()
-		for i := 0; i < len(inputResults); i++ {
+			newInput := "[]"
-			if i == 0 {
+			for i := 0; i < len(inputResults); i++ {
-				firstText := inputResults[i].Get("content.0.text")
+				if i == 0 {
-				firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
+					firstText := inputResults[i].Get("content.0.text")
-				if firstText.Exists() && firstText.String() != firstInstructions {
+					firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
-					newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+					if firstText.Exists() && firstText.String() != firstInstructions {
 						newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
 					}
 				}
 				newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
 			}
-			newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
+			template, _ = sjson.SetRaw(template, "input", newInput)
 		}
 		template, _ = sjson.SetRaw(template, "input", newInput)
 	}
 	return []byte(template)
--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 		} else {
 			template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
 		}
-		template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int())
+		inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage"))
-		template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int())
+		template, _ = sjson.Set(template, "usage.input_tokens", inputTokens)
 		template, _ = sjson.Set(template, "usage.output_tokens", outputTokens)
 		if cachedTokens > 0 {
 			template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens)
 		}
 		output = "event: message_delta\n"
 		output += fmt.Sprintf("data: %s\n\n", template)
@@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 	out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
 	out, _ = sjson.Set(out, "id", responseData.Get("id").String())
 	out, _ = sjson.Set(out, "model", responseData.Get("model").String())
-	out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
+	inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage"))
-	out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+	out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
 	out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
 	if cachedTokens > 0 {
 		out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
 	}
 	hasToolCall := false
@@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 		out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw)
 	}
-	if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() {
+	return out
-		out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
+}
-		out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+
 func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) {
 	if !usage.Exists() || usage.Type == gjson.Null {
 		return 0, 0, 0
 	}
-	return out
+	inputTokens := usage.Get("input_tokens").Int()
 	outputTokens := usage.Get("output_tokens").Int()
 	cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int()
 	if cachedTokens > 0 {
 		if inputTokens >= cachedTokens {
 			inputTokens -= cachedTokens
 		} else {
 			inputTokens = 0
 		}
 	}
 	return inputTokens, outputTokens, cachedTokens
 }
 // buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools.
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -14,6 +14,7 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -93,7 +94,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// System instruction -> as a user message with input_text parts
 	sysParts := root.Get("system_instruction.parts")
 	if sysParts.IsArray() {
-		msg := `{"type":"message","role":"user","content":[]}`
+		msg := `{"type":"message","role":"developer","content":[]}`
 		arr := sysParts.Array()
 		for i := 0; i < len(arr); i++ {
 			p := arr[i]
@@ -247,21 +248,28 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
-	// Convert thinkingBudget to reasoning.effort for level-based models
+	// Convert Gemini thinkingConfig to Codex reasoning.effort.
-	reasoningEffort := "medium" // default
+	effortSet := false
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if util.ModelUsesThinkingLevels(modelName) {
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
-				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
-					budget := int(thinkingBudget.Int())
+				if effort != "" {
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
-						reasoningEffort = effort
+					effortSet = true
-					}
+				}
 			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
 				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
 					out, _ = sjson.Set(out, "reasoning.effort", effort)
 					effortSet = true
 				}
 			}
 		}
 	}
-	out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
+	if !effortSet {
 		// No thinking config, set default effort
 		out, _ = sjson.Set(out, "reasoning.effort", "medium")
 	}
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -33,7 +33,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	rawJSON := bytes.Clone(inputRawJSON)
 	userAgent := misc.ExtractCodexUserAgent(rawJSON)
 	// Start with empty JSON object
-	out := `{}`
+	out := `{"instructions":""}`
 	// Stream must be set to true
 	out, _ = sjson.Set(out, "stream", stream)
@@ -98,7 +98,9 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	// Extract system instructions from first system message (string or text object)
 	messages := gjson.GetBytes(rawJSON, "messages")
 	_, instructions := misc.CodexInstructionsForModel(modelName, "", userAgent)
-	out, _ = sjson.Set(out, "instructions", instructions)
+	if misc.GetCodexInstructionsEnabled() {
 		out, _ = sjson.Set(out, "instructions", instructions)
 	}
 	// if messages.IsArray() {
 	// 	arr := messages.Array()
 	// 	for i := 0; i < len(arr); i++ {
@@ -141,7 +143,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 				msg := `{}`
 				msg, _ = sjson.Set(msg, "type", "message")
 				if role == "system" {
-					msg, _ = sjson.Set(msg, "role", "user")
+					msg, _ = sjson.Set(msg, "role", "developer")
 				} else {
 					msg, _ = sjson.Set(msg, "role", role)
 				}
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -74,6 +74,11 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	}
 	if hasOfficialInstructions {
 		newInput := "[]"
 		for _, item := range inputResults {
 			newInput, _ = sjson.SetRaw(newInput, "-1", item.Raw)
 		}
 		rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(newInput))
 		return rawJSON
 	}
 	// log.Debugf("instructions not matched, %s\n", originalInstructions)
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -10,7 +10,6 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -160,12 +159,12 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 	}
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -35,37 +35,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)
-	// Reasoning effort -> thinkingBudget/include_thoughts
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
+	if re.Exists() {
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
+		if effort != "" {
-	}
+			thinkingPath := "request.generationConfig.thinkingConfig"
-
+			if effort == "auto" {
-	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
-	// Only apply for models that use numeric budgets, not discrete levels.
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+			} else {
-		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
-			var setBudget bool
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 			var budget int
 			if v := tc.Get("thinkingBudget"); v.Exists() {
 				budget = int(v.Int())
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
 				budget = int(v.Int())
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}
 			if v := tc.Get("includeThoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
 	}
@@ -147,6 +129,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 			}
 		}
 		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -156,16 +139,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
 					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
 					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
 						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
 							systemPartIndex++
 						}
 					}
 				}
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -10,7 +10,6 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -153,13 +152,13 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	}
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
-	// Only apply for models that use numeric budgets, not discrete levels.
+	// Translator only does format conversion, ApplyThinking handles model capability validation.
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
+				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -35,55 +35,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)
-	// Reasoning effort -> thinkingBudget/include_thoughts
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig.
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	// Only apply numeric budgets for models that use budgets (not discrete levels) to avoid
 	// incorrectly applying thinkingBudget for level-based models like gpt-5. Gemini 3 models
 	// use thinkingLevel/includeThoughts instead.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
+	if re.Exists() {
 	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		if util.IsGemini3Model(modelName) {
+		if effort != "" {
-			switch effort {
+			thinkingPath := "generationConfig.thinkingConfig"
-			case "none":
+			if effort == "auto" {
-				out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig")
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
-			case "auto":
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
-				includeThoughts := true
+			} else {
-				out = util.ApplyGeminiThinkingLevel(out, "", &includeThoughts)
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
-			default:
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 				if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
 					out = util.ApplyGeminiThinkingLevel(out, level, nil)
 				}
 			}
 		} else if !util.ModelUsesThinkingLevels(modelName) {
 			out = util.ApplyReasoningEffortToGemini(out, effort)
 		}
 	}
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
 			if v := tc.Get("thinkingBudget"); v.Exists() {
 				budget = int(v.Int())
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
 				budget = int(v.Int())
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}
 			if v := tc.Get("includeThoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
 	}
@@ -165,6 +129,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			}
 		}
 		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -174,16 +139,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				// system -> system_instruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
-					out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.String())
 					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
-					out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.Get("text").String())
 					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
-						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
+						out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
 							systemPartIndex++
 						}
 					}
 				}
@@ -198,7 +166,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					for _, item := range items {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
 							if text != "" {
 								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
 							}
 							p++
 						case "image_url":
 							imageURL := item.Get("image_url.url").String()
@@ -243,6 +214,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
 							text := item.Get("text").String()
 							if text != "" {
 								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
 							}
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -5,7 +5,6 @@ import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -388,31 +387,19 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 		out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
 	}
-	// OpenAI official reasoning fields take precedence
+	// Apply thinking configuration: convert OpenAI Responses API reasoning.effort to Gemini thinkingConfig.
-	// Only convert for models that use numeric budgets (not discrete levels).
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
-	hasOfficialThinking := root.Get("reasoning.effort").Exists()
+	re := root.Get("reasoning.effort")
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	if re.Exists() {
-		reasoningEffort := root.Get("reasoning.effort")
+		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
+		if effort != "" {
-	}
+			thinkingPath := "generationConfig.thinkingConfig"
-
+			if effort == "auto" {
-	// Cherry Studio extension (applies only when official fields are missing)
+				out, _ = sjson.Set(out, thinkingPath+".thinkingBudget", -1)
-	// Only apply for models that use numeric budgets, not discrete levels.
+				out, _ = sjson.Set(out, thinkingPath+".includeThoughts", true)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+			} else {
-		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
+				out, _ = sjson.Set(out, thinkingPath+".thinkingLevel", effort)
-			var setBudget bool
+				out, _ = sjson.Set(out, thinkingPath+".includeThoughts", effort != "none")
 			var budget int
 			if v := tc.Get("thinking_budget"); v.Exists() {
 				budget = int(v.Int())
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}
 			if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if setBudget {
 				if budget != 0 {
 					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 				}
 			}
 		}
 	}
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -9,7 +9,7 @@ import (
 	"bytes"
 	"strings"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -61,23 +61,23 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	out, _ = sjson.Set(out, "stream", stream)
 	// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
-	if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+	if thinkingConfig := root.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-		if thinkingType := thinking.Get("type"); thinkingType.Exists() {
+		if thinkingType := thinkingConfig.Get("type"); thinkingType.Exists() {
 			switch thinkingType.String() {
 			case "enabled":
-				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+				if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
 					budget := int(budgetTokens.Int())
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
 						out, _ = sjson.Set(out, "reasoning_effort", effort)
 					}
 				} else {
 					// No budget_tokens specified, default to "auto" for enabled thinking
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, -1); ok && effort != "" {
+					if effort, ok := thinking.ConvertBudgetToLevel(-1); ok && effort != "" {
 						out, _ = sjson.Set(out, "reasoning_effort", effort)
 					}
 				}
 			case "disabled":
-				if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
+				if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			}
@@ -88,7 +88,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	var messagesJSON = "[]"
 	// Handle system message first
-	systemMsgJSON := `{"role":"system","content":[{"type":"text","text":"Use ANY tool, the parameters MUST accord with RFC 8259 (The JavaScript Object Notation (JSON) Data Interchange Format), the keys and value MUST be enclosed in double quotes."}]}`
+	systemMsgJSON := `{"role":"system","content":[]}`
 	if system := root.Get("system"); system.Exists() {
 		if system.Type == gjson.String {
 			if system.String() != "" {
@@ -129,7 +129,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 					case "thinking":
 						// Only map thinking to reasoning_content for assistant messages (security: prevent injection)
 						if role == "assistant" {
-							thinkingText := util.GetThinkingText(part)
+							thinkingText := thinking.GetThinkingText(part)
 							// Skip empty or whitespace-only thinking
 							if strings.TrimSpace(thinkingText) != "" {
 								reasoningParts = append(reasoningParts, thinkingText)
--- a/internal/translator/openai/claude/openai_claude_response.go
+++ b/internal/translator/openai/claude/openai_claude_response.go
@@ -289,21 +289,17 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 	// Only process if usage has actual values (not null)
 	if param.FinishReason != "" {
 		usage := root.Get("usage")
-		var inputTokens, outputTokens int64
+		var inputTokens, outputTokens, cachedTokens int64
 		if usage.Exists() && usage.Type != gjson.Null {
-			// Check if usage has actual token counts
+			inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage)
 			promptTokens := usage.Get("prompt_tokens")
 			completionTokens := usage.Get("completion_tokens")
 			if promptTokens.Exists() && completionTokens.Exists() {
 				inputTokens = promptTokens.Int()
 				outputTokens = completionTokens.Int()
 			}
 			// Send message_delta with usage
 			messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
 			if cachedTokens > 0 {
 				messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens)
 			}
 			results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
 			param.MessageDeltaSent = true
@@ -423,13 +419,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string {
 	// Set usage information
 	if usage := root.Get("usage"); usage.Exists() {
-		out, _ = sjson.Set(out, "usage.input_tokens", usage.Get("prompt_tokens").Int())
+		inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage)
-		out, _ = sjson.Set(out, "usage.output_tokens", usage.Get("completion_tokens").Int())
+		out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
-		reasoningTokens := int64(0)
+		out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
-		if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() {
+		if cachedTokens > 0 {
-			reasoningTokens = v.Int()
+			out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
 		}
 		out, _ = sjson.Set(out, "usage.reasoning_tokens", reasoningTokens)
 	}
 	return []string{out}
@@ -674,8 +669,12 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 	}
 	if respUsage := root.Get("usage"); respUsage.Exists() {
-		out, _ = sjson.Set(out, "usage.input_tokens", respUsage.Get("prompt_tokens").Int())
+		inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage)
-		out, _ = sjson.Set(out, "usage.output_tokens", respUsage.Get("completion_tokens").Int())
+		out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
 		out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
 		if cachedTokens > 0 {
 			out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
 		}
 	}
 	if !stopReasonSet {
@@ -692,3 +691,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 func ClaudeTokenCount(ctx context.Context, count int64) string {
 	return fmt.Sprintf(`{"input_tokens":%d}`, count)
 }
 func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) {
 	if !usage.Exists() || usage.Type == gjson.Null {
 		return 0, 0, 0
 	}
 	inputTokens := usage.Get("prompt_tokens").Int()
 	outputTokens := usage.Get("completion_tokens").Int()
 	cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int()
 	if cachedTokens > 0 {
 		if inputTokens >= cachedTokens {
 			inputTokens -= cachedTokens
 		} else {
 			inputTokens = 0
 		}
 	}
 	return inputTokens, outputTokens, cachedTokens
 }
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -12,7 +12,7 @@ import (
 	"math/big"
 	"strings"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -77,12 +77,15 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 			}
 		}
-		// Convert thinkingBudget to reasoning_effort
+		// Map Gemini thinkingConfig to OpenAI reasoning_effort.
 		// Always perform conversion to support allowCompat models that may not be in registry
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
-				budget := int(thinkingBudget.Int())
+				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
-				if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+				if effort != "" {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
 				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			}
--- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go
@@ -12,6 +12,10 @@ import (
 	"github.com/tidwall/sjson"
 )
 type oaiToResponsesStateReasoning struct {
 	ReasoningID   string
 	ReasoningData string
 }
 type oaiToResponsesState struct {
 	Seq            int
 	ResponseID     string
@@ -23,6 +27,7 @@ type oaiToResponsesState struct {
 	// Per-output message text buffers by index
 	MsgTextBuf   map[int]*strings.Builder
 	ReasoningBuf strings.Builder
 	Reasonings   []oaiToResponsesStateReasoning
 	FuncArgsBuf  map[int]*strings.Builder // index -> args
 	FuncNames    map[int]string           // index -> name
 	FuncCallIDs  map[int]string           // index -> call_id
@@ -63,6 +68,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 			MsgItemDone:     make(map[int]bool),
 			FuncArgsDone:    make(map[int]bool),
 			FuncItemDone:    make(map[int]bool),
 			Reasonings:      make([]oaiToResponsesStateReasoning, 0),
 		}
 	}
 	st := (*param).(*oaiToResponsesState)
@@ -157,6 +163,31 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 		st.Started = true
 	}
 	stopReasoning := func(text string) {
 		// Emit reasoning done events
 		textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}`
 		textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq())
 		textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID)
 		textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex)
 		textDone, _ = sjson.Set(textDone, "text", text)
 		out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone))
 		partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}`
 		partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
 		partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID)
 		partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex)
 		partDone, _ = sjson.Set(partDone, "part.text", text)
 		out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone))
 		outputItemDone := `{"type":"response.output_item.done","item":{"id":"","type":"reasoning","encrypted_content":"","summary":[{"type":"summary_text","text":""}]},"output_index":0,"sequence_number":0}`
 		outputItemDone, _ = sjson.Set(outputItemDone, "sequence_number", nextSeq())
 		outputItemDone, _ = sjson.Set(outputItemDone, "item.id", st.ReasoningID)
 		outputItemDone, _ = sjson.Set(outputItemDone, "output_index", st.ReasoningIndex)
 		outputItemDone, _ = sjson.Set(outputItemDone, "item.summary.text", text)
 		out = append(out, emitRespEvent("response.output_item.done", outputItemDone))
 		st.Reasonings = append(st.Reasonings, oaiToResponsesStateReasoning{ReasoningID: st.ReasoningID, ReasoningData: text})
 		st.ReasoningID = ""
 	}
 	// choices[].delta content / tool_calls / reasoning_content
 	if choices := root.Get("choices"); choices.Exists() && choices.IsArray() {
 		choices.ForEach(func(_, choice gjson.Result) bool {
@@ -165,6 +196,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 			if delta.Exists() {
 				if c := delta.Get("content"); c.Exists() && c.String() != "" {
 					// Ensure the message item and its first content part are announced before any text deltas
 					if st.ReasoningID != "" {
 						stopReasoning(st.ReasoningBuf.String())
 						st.ReasoningBuf.Reset()
 					}
 					if !st.MsgItemAdded[idx] {
 						item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"in_progress","content":[],"role":"assistant"}}`
 						item, _ = sjson.Set(item, "sequence_number", nextSeq())
@@ -226,6 +261,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 				// tool calls
 				if tcs := delta.Get("tool_calls"); tcs.Exists() && tcs.IsArray() {
 					if st.ReasoningID != "" {
 						stopReasoning(st.ReasoningBuf.String())
 						st.ReasoningBuf.Reset()
 					}
 					// Before emitting any function events, if a message is open for this index,
 					// close its text/content to match Codex expected ordering.
 					if st.MsgItemAdded[idx] && !st.MsgItemDone[idx] {
@@ -361,17 +400,8 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 				}
 				if st.ReasoningID != "" {
-					// Emit reasoning done events
+					stopReasoning(st.ReasoningBuf.String())
-					textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}`
+					st.ReasoningBuf.Reset()
 					textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq())
 					textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID)
 					textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex)
 					out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone))
 					partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}`
 					partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
 					partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID)
 					partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex)
 					out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone))
 				}
 				// Emit function call done events for any active function calls
@@ -485,11 +515,13 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 				}
 				// Build response.output using aggregated buffers
 				outputsWrapper := `{"arr":[]}`
-				if st.ReasoningBuf.Len() > 0 {
+				if len(st.Reasonings) > 0 {
-					item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`
+					for _, r := range st.Reasonings {
-					item, _ = sjson.Set(item, "id", st.ReasoningID)
+						item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`
-					item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String())
+						item, _ = sjson.Set(item, "id", r.ReasoningID)
-					outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
+						item, _ = sjson.Set(item, "summary.0.text", r.ReasoningData)
 						outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
 					}
 				}
 				// Append message items in ascending index order
 				if len(st.MsgItemAdded) > 0 {
--- a/internal/util/claude_thinking.go
+++ b/internal/util/claude_thinking.go
@@ -1,49 +0,0 @@
 package util
 import (
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
 // It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
 // If budget is nil or the payload already has thinking config, it returns the payload unchanged.
 func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
 	if budget == nil {
 		return body
 	}
 	if gjson.GetBytes(body, "thinking").Exists() {
 		return body
 	}
 	if *budget <= 0 {
 		return body
 	}
 	updated := body
 	updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
 	updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
 	return updated
 }
 // ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
 // It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
 // Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
 func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
 	if !ModelSupportsThinking(modelName) {
 		return nil, false
 	}
 	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
 	if !matched {
 		return nil, false
 	}
 	if include != nil && !*include {
 		return nil, true
 	}
 	if budget == nil {
 		return nil, true
 	}
 	normalized := NormalizeThinkingBudget(modelName, *budget)
 	if normalized <= 0 {
 		return nil, true
 	}
 	return &normalized, true
 }
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -19,6 +19,7 @@ func CleanJSONSchemaForAntigravity(jsonStr string) string {
 	// Phase 1: Convert and add hints
 	jsonStr = convertRefsToHints(jsonStr)
 	jsonStr = convertConstToEnum(jsonStr)
 	jsonStr = convertEnumValuesToStrings(jsonStr)
 	jsonStr = addEnumHints(jsonStr)
 	jsonStr = addAdditionalPropertiesHints(jsonStr)
 	jsonStr = moveConstraintsToDescription(jsonStr)
@@ -77,6 +78,33 @@ func convertConstToEnum(jsonStr string) string {
 	return jsonStr
 }
 // convertEnumValuesToStrings ensures all enum values are strings.
 // Gemini API requires enum values to be of type string, not numbers or booleans.
 func convertEnumValuesToStrings(jsonStr string) string {
 	for _, p := range findPaths(jsonStr, "enum") {
 		arr := gjson.Get(jsonStr, p)
 		if !arr.IsArray() {
 			continue
 		}
 		var stringVals []string
 		needsConversion := false
 		for _, item := range arr.Array() {
 			// Check if any value is not a string
 			if item.Type != gjson.String {
 				needsConversion = true
 			}
 			stringVals = append(stringVals, item.String())
 		}
 		// Only update if we found non-string values
 		if needsConversion {
 			jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
 		}
 	}
 	return jsonStr
 }
 func addEnumHints(jsonStr string) string {
 	for _, p := range findPaths(jsonStr, "enum") {
 		arr := gjson.Get(jsonStr, p)
--- a/internal/util/gemini_schema_test.go
+++ b/internal/util/gemini_schema_test.go
@@ -818,3 +818,54 @@ func TestCleanJSONSchemaForAntigravity_MultipleFormats(t *testing.T) {
 		t.Errorf("date-time format hint should be added, got: %s", result)
 	}
 }
 func TestCleanJSONSchemaForAntigravity_NumericEnumToString(t *testing.T) {
 	// Gemini API requires enum values to be strings, not numbers
 	input := `{
 		"type": "object",
 		"properties": {
 			"priority": {"type": "integer", "enum": [0, 1, 2]},
 			"level": {"type": "number", "enum": [1.5, 2.5, 3.5]},
 			"status": {"type": "string", "enum": ["active", "inactive"]}
 		}
 	}`
 	result := CleanJSONSchemaForAntigravity(input)
 	// Numeric enum values should be converted to strings
 	if strings.Contains(result, `"enum":[0,1,2]`) {
 		t.Errorf("Integer enum values should be converted to strings, got: %s", result)
 	}
 	if strings.Contains(result, `"enum":[1.5,2.5,3.5]`) {
 		t.Errorf("Float enum values should be converted to strings, got: %s", result)
 	}
 	// Should contain string versions
 	if !strings.Contains(result, `"0"`) || !strings.Contains(result, `"1"`) || !strings.Contains(result, `"2"`) {
 		t.Errorf("Integer enum values should be converted to string format, got: %s", result)
 	}
 	// String enum values should remain unchanged
 	if !strings.Contains(result, `"active"`) || !strings.Contains(result, `"inactive"`) {
 		t.Errorf("String enum values should remain unchanged, got: %s", result)
 	}
 }
 func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) {
 	// Boolean enum values should also be converted to strings
 	input := `{
 		"type": "object",
 		"properties": {
 			"enabled": {"type": "boolean", "enum": [true, false]}
 		}
 	}`
 	result := CleanJSONSchemaForAntigravity(input)
 	// Boolean enum values should be converted to strings
 	if strings.Contains(result, `"enum":[true,false]`) {
 		t.Errorf("Boolean enum values should be converted to strings, got: %s", result)
 	}
 	// Should contain string versions "true" and "false"
 	if !strings.Contains(result, `"true"`) || !strings.Contains(result, `"false"`) {
 		t.Errorf("Boolean enum values should be converted to string format, got: %s", result)
 	}
 }
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -1,617 +0,0 @@
 package util
 import (
 	"regexp"
 	"strings"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 const (
 	GeminiThinkingBudgetMetadataKey  = "gemini_thinking_budget"
 	GeminiIncludeThoughtsMetadataKey = "gemini_include_thoughts"
 	GeminiOriginalModelMetadataKey   = "gemini_original_model"
 )
 // Gemini model family detection patterns
 var (
 	gemini3Pattern      = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`)
 	gemini3ProPattern   = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]pro`)
 	gemini3FlashPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]flash`)
 	gemini25Pattern     = regexp.MustCompile(`(?i)^gemini[_-]?2\.5[_-]`)
 )
 // IsGemini3Model returns true if the model is a Gemini 3 family model.
 // Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number).
 func IsGemini3Model(model string) bool {
 	return gemini3Pattern.MatchString(model)
 }
 // IsGemini3ProModel returns true if the model is a Gemini 3 Pro variant.
 // Gemini 3 Pro supports thinkingLevel: "low", "high" (default: "high")
 func IsGemini3ProModel(model string) bool {
 	return gemini3ProPattern.MatchString(model)
 }
 // IsGemini3FlashModel returns true if the model is a Gemini 3 Flash variant.
 // Gemini 3 Flash supports thinkingLevel: "minimal", "low", "medium", "high" (default: "high")
 func IsGemini3FlashModel(model string) bool {
 	return gemini3FlashPattern.MatchString(model)
 }
 // IsGemini25Model returns true if the model is a Gemini 2.5 family model.
 // Gemini 2.5 models should use thinkingBudget (number).
 func IsGemini25Model(model string) bool {
 	return gemini25Pattern.MatchString(model)
 }
 // Gemini3ProThinkingLevels are the valid thinkingLevel values for Gemini 3 Pro models.
 var Gemini3ProThinkingLevels = []string{"low", "high"}
 // Gemini3FlashThinkingLevels are the valid thinkingLevel values for Gemini 3 Flash models.
 var Gemini3FlashThinkingLevels = []string{"minimal", "low", "medium", "high"}
 func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
 	if budget == nil && includeThoughts == nil {
 		return body
 	}
 	updated := body
 	if budget != nil {
 		valuePath := "generationConfig.thinkingConfig.thinkingBudget"
 		rewritten, err := sjson.SetBytes(updated, valuePath, *budget)
 		if err == nil {
 			updated = rewritten
 		}
 	}
 	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && budget != nil && *budget != 0 {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
 			!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
 			valuePath := "generationConfig.thinkingConfig.include_thoughts"
 			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	return updated
 }
 func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
 	if budget == nil && includeThoughts == nil {
 		return body
 	}
 	updated := body
 	if budget != nil {
 		valuePath := "request.generationConfig.thinkingConfig.thinkingBudget"
 		rewritten, err := sjson.SetBytes(updated, valuePath, *budget)
 		if err == nil {
 			updated = rewritten
 		}
 	}
 	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && budget != nil && *budget != 0 {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
 			!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
 			valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
 			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	return updated
 }
 // ApplyGeminiThinkingLevel applies thinkingLevel config for Gemini 3 models.
 // For standard Gemini API format (generationConfig.thinkingConfig path).
 // Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
 func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
 	if level == "" && includeThoughts == nil {
 		return body
 	}
 	updated := body
 	if level != "" {
 		valuePath := "generationConfig.thinkingConfig.thinkingLevel"
 		rewritten, err := sjson.SetBytes(updated, valuePath, level)
 		if err == nil {
 			updated = rewritten
 		}
 	}
 	// Default to including thoughts when a level is set but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && level != "" {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
 			!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
 			valuePath := "generationConfig.thinkingConfig.includeThoughts"
 			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	if tb := gjson.GetBytes(body, "generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
 		updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig.thinkingBudget")
 	}
 	return updated
 }
 // ApplyGeminiCLIThinkingLevel applies thinkingLevel config for Gemini 3 models.
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
 func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
 	if level == "" && includeThoughts == nil {
 		return body
 	}
 	updated := body
 	if level != "" {
 		valuePath := "request.generationConfig.thinkingConfig.thinkingLevel"
 		rewritten, err := sjson.SetBytes(updated, valuePath, level)
 		if err == nil {
 			updated = rewritten
 		}
 	}
 	// Default to including thoughts when a level is set but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && level != "" {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
 			!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
 			valuePath := "request.generationConfig.thinkingConfig.includeThoughts"
 			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	if tb := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
 		updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig.thinkingBudget")
 	}
 	return updated
 }
 // ValidateGemini3ThinkingLevel validates that the thinkingLevel is valid for the Gemini 3 model variant.
 // Returns the validated level (normalized to lowercase) and true if valid, or empty string and false if invalid.
 func ValidateGemini3ThinkingLevel(model, level string) (string, bool) {
 	if level == "" {
 		return "", false
 	}
 	normalized := strings.ToLower(strings.TrimSpace(level))
 	var validLevels []string
 	if IsGemini3ProModel(model) {
 		validLevels = Gemini3ProThinkingLevels
 	} else if IsGemini3FlashModel(model) {
 		validLevels = Gemini3FlashThinkingLevels
 	} else if IsGemini3Model(model) {
 		// Unknown Gemini 3 variant - allow all levels as fallback
 		validLevels = Gemini3FlashThinkingLevels
 	} else {
 		return "", false
 	}
 	for _, valid := range validLevels {
 		if normalized == valid {
 			return normalized, true
 		}
 	}
 	return "", false
 }
 // ThinkingBudgetToGemini3Level converts a thinkingBudget to a thinkingLevel for Gemini 3 models.
 // This provides backward compatibility when thinkingBudget is provided for Gemini 3 models.
 // Returns the appropriate thinkingLevel and true if conversion is possible.
 func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) {
 	if !IsGemini3Model(model) {
 		return "", false
 	}
 	// Map budget to level based on Google's documentation
 	// Gemini 3 Pro: "low", "high" (default: "high")
 	// Gemini 3 Flash: "minimal", "low", "medium", "high" (default: "high")
 	switch {
 	case budget == -1:
 		// Dynamic budget maps to "high" (API default)
 		return "high", true
 	case budget == 0:
 		// Zero budget - Gemini 3 doesn't support disabling thinking
 		// Map to lowest available level
 		if IsGemini3FlashModel(model) {
 			return "minimal", true
 		}
 		return "low", true
 	case budget > 0 && budget <= 512:
 		if IsGemini3FlashModel(model) {
 			return "minimal", true
 		}
 		return "low", true
 	case budget <= 1024:
 		return "low", true
 	case budget <= 8192:
 		if IsGemini3FlashModel(model) {
 			return "medium", true
 		}
 		return "low", true // Pro doesn't have medium, use low
 	default:
 		return "high", true
 	}
 }
 // modelsWithDefaultThinking lists models that should have thinking enabled by default
 // when no explicit thinkingConfig is provided.
 // Note: Gemini 3 models are NOT included here because per Google's official documentation:
 //   - thinkingLevel defaults to "high" (dynamic thinking)
 //   - includeThoughts defaults to false
 //
 // We should not override these API defaults; let users explicitly configure if needed.
 var modelsWithDefaultThinking = map[string]bool{
 	// "gemini-3-pro-preview":       true,
 	// "gemini-3-pro-image-preview": true,
 	// "gemini-3-flash-preview":     true,
 }
 // ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
 func ModelHasDefaultThinking(model string) bool {
 	return modelsWithDefaultThinking[model]
 }
 // ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
 // For standard Gemini API format (generationConfig.thinkingConfig path).
 // Returns the modified body if thinkingConfig was added, otherwise returns the original.
 // For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
 func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
 	if !ModelHasDefaultThinking(model) {
 		return body
 	}
 	if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
 		return body
 	}
 	// Gemini 3 models use thinkingLevel instead of thinkingBudget
 	if IsGemini3Model(model) {
 		// Don't set a default - let the API use its dynamic default ("high")
 		// Only set includeThoughts
 		updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.includeThoughts", true)
 		return updated
 	}
 	// Gemini 2.5 and other models use thinkingBudget
 	updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
 	updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
 	return updated
 }
 // ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
 // For standard Gemini API format (generationConfig.thinkingConfig path).
 // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
 // or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
 func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
 	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	// Determine which model to use for validation
 	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	// First try to get effort string from metadata
 	effort, ok := ReasoningEffortFromMetadata(metadata)
 	if ok && effort != "" {
 		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 			return ApplyGeminiThinkingLevel(body, level, nil)
 		}
 	}
 	// Fallback: check for numeric budget and convert to thinkingLevel
 	budget, _, _, matched := ThinkingFromMetadata(metadata)
 	if matched && budget != nil {
 		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
 			return ApplyGeminiThinkingLevel(body, level, nil)
 		}
 	}
 	return body
 }
 // ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
 // or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
 func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
 	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	// Determine which model to use for validation
 	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	// First try to get effort string from metadata
 	effort, ok := ReasoningEffortFromMetadata(metadata)
 	if ok && effort != "" {
 		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 			return ApplyGeminiCLIThinkingLevel(body, level, nil)
 		}
 	}
 	// Fallback: check for numeric budget and convert to thinkingLevel
 	budget, _, _, matched := ThinkingFromMetadata(metadata)
 	if matched && budget != nil {
 		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
 			return ApplyGeminiCLIThinkingLevel(body, level, nil)
 		}
 	}
 	return body
 }
 // ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Returns the modified body if thinkingConfig was added, otherwise returns the original.
 // For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
 func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
 	// Use the alias from metadata if available for model property lookup
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
 		return body
 	}
 	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
 		return body
 	}
 	// Gemini 3 models use thinkingLevel instead of thinkingBudget
 	if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
 		// Don't set a default - let the API use its dynamic default ("high")
 		// Only set includeThoughts
 		updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
 		return updated
 	}
 	// Gemini 2.5 and other models use thinkingBudget
 	updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
 	updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
 	return updated
 }
 // StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
 // when the target model does not advertise Thinking capability. It cleans both
 // standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
 // in case upstream injected thinking for an unsupported model.
 func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
 	if ModelSupportsThinking(model) || len(body) == 0 {
 		return body
 	}
 	updated := body
 	// Gemini CLI path
 	updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
 	// Standard Gemini path
 	updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
 	return updated
 }
 // NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
 // request body (generationConfig.thinkingConfig.thinkingBudget path).
 // For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation,
 // unless skipGemini3Check is provided and true.
 func NormalizeGeminiThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte {
 	const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
 	const levelPath = "generationConfig.thinkingConfig.thinkingLevel"
 	budget := gjson.GetBytes(body, budgetPath)
 	if !budget.Exists() {
 		return body
 	}
 	// For Gemini 3 models, convert thinkingBudget to thinkingLevel
 	skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
 	if IsGemini3Model(model) && !skipGemini3 {
 		if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
 			updated, _ := sjson.SetBytes(body, levelPath, level)
 			updated, _ = sjson.DeleteBytes(updated, budgetPath)
 			return updated
 		}
 		// If conversion fails, just remove the budget (let API use default)
 		updated, _ := sjson.DeleteBytes(body, budgetPath)
 		return updated
 	}
 	// For Gemini 2.5 and other models, normalize the budget value
 	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
 	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
 	return updated
 }
 // NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
 // request body (request.generationConfig.thinkingConfig.thinkingBudget path).
 // For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation,
 // unless skipGemini3Check is provided and true.
 func NormalizeGeminiCLIThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte {
 	const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
 	const levelPath = "request.generationConfig.thinkingConfig.thinkingLevel"
 	budget := gjson.GetBytes(body, budgetPath)
 	if !budget.Exists() {
 		return body
 	}
 	// For Gemini 3 models, convert thinkingBudget to thinkingLevel
 	skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
 	if IsGemini3Model(model) && !skipGemini3 {
 		if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
 			updated, _ := sjson.SetBytes(body, levelPath, level)
 			updated, _ = sjson.DeleteBytes(updated, budgetPath)
 			return updated
 		}
 		// If conversion fails, just remove the budget (let API use default)
 		updated, _ := sjson.DeleteBytes(body, budgetPath)
 		return updated
 	}
 	// For Gemini 2.5 and other models, normalize the budget value
 	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
 	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
 	return updated
 }
 // ReasoningEffortBudgetMapping defines the thinkingBudget values for each reasoning effort level.
 var ReasoningEffortBudgetMapping = map[string]int{
 	"none":    0,
 	"auto":    -1,
 	"minimal": 512,
 	"low":     1024,
 	"medium":  8192,
 	"high":    24576,
 	"xhigh":   32768,
 }
 // ApplyReasoningEffortToGemini applies OpenAI reasoning_effort to Gemini thinkingConfig
 // for standard Gemini API format (generationConfig.thinkingConfig path).
 // Returns the modified body with thinkingBudget and include_thoughts set.
 func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
 	normalized := strings.ToLower(strings.TrimSpace(effort))
 	if normalized == "" {
 		return body
 	}
 	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
 	includePath := "generationConfig.thinkingConfig.include_thoughts"
 	if normalized == "none" {
 		body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
 		return body
 	}
 	budget, ok := ReasoningEffortBudgetMapping[normalized]
 	if !ok {
 		return body
 	}
 	body, _ = sjson.SetBytes(body, budgetPath, budget)
 	body, _ = sjson.SetBytes(body, includePath, true)
 	return body
 }
 // ApplyReasoningEffortToGeminiCLI applies OpenAI reasoning_effort to Gemini CLI thinkingConfig
 // for Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Returns the modified body with thinkingBudget and include_thoughts set.
 func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
 	normalized := strings.ToLower(strings.TrimSpace(effort))
 	if normalized == "" {
 		return body
 	}
 	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
 	includePath := "request.generationConfig.thinkingConfig.include_thoughts"
 	if normalized == "none" {
 		body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
 		return body
 	}
 	budget, ok := ReasoningEffortBudgetMapping[normalized]
 	if !ok {
 		return body
 	}
 	body, _ = sjson.SetBytes(body, budgetPath, budget)
 	body, _ = sjson.SetBytes(body, includePath, true)
 	return body
 }
 // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
 // and converts it to "thinkingBudget" for Gemini 2.5 models.
 // For Gemini 3 models, preserves thinkingLevel unless skipGemini3Check is provided and true.
 // Mappings for Gemini 2.5:
 //   - "high" -> 32768
 //   - "medium" -> 8192
 //   - "low" -> 1024
 //   - "minimal" -> 512
 //
 // It removes "thinkingLevel" after conversion (for Gemini 2.5 only).
 func ConvertThinkingLevelToBudget(body []byte, model string, skipGemini3Check ...bool) []byte {
 	levelPath := "generationConfig.thinkingConfig.thinkingLevel"
 	res := gjson.GetBytes(body, levelPath)
 	if !res.Exists() {
 		return body
 	}
 	// For Gemini 3 models, preserve thinkingLevel unless explicitly skipped
 	skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
 	if IsGemini3Model(model) && !skipGemini3 {
 		return body
 	}
 	budget, ok := ThinkingLevelToBudget(res.String())
 	if !ok {
 		updated, _ := sjson.DeleteBytes(body, levelPath)
 		return updated
 	}
 	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
 	updated, err := sjson.SetBytes(body, budgetPath, budget)
 	if err != nil {
 		return body
 	}
 	updated, err = sjson.DeleteBytes(updated, levelPath)
 	if err != nil {
 		return body
 	}
 	return updated
 }
 // ConvertThinkingLevelToBudgetCLI checks for "request.generationConfig.thinkingConfig.thinkingLevel"
 // and converts it to "thinkingBudget" for Gemini 2.5 models.
 // For Gemini 3 models, preserves thinkingLevel as-is (does not convert).
 func ConvertThinkingLevelToBudgetCLI(body []byte, model string) []byte {
 	levelPath := "request.generationConfig.thinkingConfig.thinkingLevel"
 	res := gjson.GetBytes(body, levelPath)
 	if !res.Exists() {
 		return body
 	}
 	// For Gemini 3 models, preserve thinkingLevel - don't convert to budget
 	if IsGemini3Model(model) {
 		return body
 	}
 	budget, ok := ThinkingLevelToBudget(res.String())
 	if !ok {
 		updated, _ := sjson.DeleteBytes(body, levelPath)
 		return updated
 	}
 	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
 	updated, err := sjson.SetBytes(body, budgetPath, budget)
 	if err != nil {
 		return body
 	}
 	updated, err = sjson.DeleteBytes(updated, levelPath)
 	if err != nil {
 		return body
 	}
 	return updated
 }
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -1,245 +0,0 @@
 package util
 import (
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 )
 // ModelSupportsThinking reports whether the given model has Thinking capability
 // according to the model registry metadata (provider-agnostic).
 func ModelSupportsThinking(model string) bool {
 	if model == "" {
 		return false
 	}
 	// First check the global dynamic registry
 	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
 		return info.Thinking != nil
 	}
 	// Fallback: check static model definitions
 	if info := registry.LookupStaticModelInfo(model); info != nil {
 		return info.Thinking != nil
 	}
 	// Fallback: check Antigravity static config
 	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil {
 		return cfg.Thinking != nil
 	}
 	return false
 }
 // NormalizeThinkingBudget clamps the requested thinking budget to the
 // supported range for the specified model using registry metadata only.
 // If the model is unknown or has no Thinking metadata, returns the original budget.
 // For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
 // or min (0 if zero is allowed and mid <= 0).
 func NormalizeThinkingBudget(model string, budget int) int {
 	if budget == -1 { // dynamic
 		if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
 			if dynamicAllowed {
 				return -1
 			}
 			mid := (minBudget + maxBudget) / 2
 			if mid <= 0 && zeroAllowed {
 				return 0
 			}
 			if mid <= 0 {
 				return minBudget
 			}
 			return mid
 		}
 		return -1
 	}
 	if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
 		if budget == 0 {
 			if zeroAllowed {
 				return 0
 			}
 			return minBudget
 		}
 		if budget < minBudget {
 			return minBudget
 		}
 		if budget > maxBudget {
 			return maxBudget
 		}
 		return budget
 	}
 	return budget
 }
 // thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
 func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
 	if model == "" {
 		return false, 0, 0, false, false
 	}
 	// First check global dynamic registry
 	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil {
 		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 	}
 	// Fallback: check static model definitions
 	if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil {
 		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 	}
 	// Fallback: check Antigravity static config
 	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil {
 		return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed
 	}
 	return false, 0, 0, false, false
 }
 // GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
 // Returns nil if the model has no thinking support or no levels defined.
 func GetModelThinkingLevels(model string) []string {
 	if model == "" {
 		return nil
 	}
 	info := registry.GetGlobalRegistry().GetModelInfo(model)
 	if info == nil || info.Thinking == nil {
 		return nil
 	}
 	return info.Thinking.Levels
 }
 // ModelUsesThinkingLevels reports whether the model uses discrete reasoning
 // effort levels instead of numeric budgets.
 func ModelUsesThinkingLevels(model string) bool {
 	levels := GetModelThinkingLevels(model)
 	return len(levels) > 0
 }
 // NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
 // level for the given model. Returns false when the level is not supported.
 func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 	levels := GetModelThinkingLevels(model)
 	if len(levels) == 0 {
 		return "", false
 	}
 	loweredEffort := strings.ToLower(strings.TrimSpace(effort))
 	for _, lvl := range levels {
 		if strings.ToLower(lvl) == loweredEffort {
 			return lvl, true
 		}
 	}
 	return "", false
 }
 // IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
 // These models may not advertise Thinking metadata in the registry.
 func IsOpenAICompatibilityModel(model string) bool {
 	if model == "" {
 		return false
 	}
 	info := registry.GetGlobalRegistry().GetModelInfo(model)
 	if info == nil {
 		return false
 	}
 	return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility")
 }
 // ThinkingEffortToBudget maps a reasoning effort level to a numeric thinking budget (tokens),
 // clamping the result to the model's supported range.
 //
 // Mappings (values are normalized to model's supported range):
 //   - "none"    -> 0
 //   - "auto"    -> -1
 //   - "minimal" -> 512
 //   - "low"     -> 1024
 //   - "medium"  -> 8192
 //   - "high"    -> 24576
 //   - "xhigh"   -> 32768
 //
 // Returns false when the effort level is empty or unsupported.
 func ThinkingEffortToBudget(model, effort string) (int, bool) {
 	if effort == "" {
 		return 0, false
 	}
 	normalized, ok := NormalizeReasoningEffortLevel(model, effort)
 	if !ok {
 		normalized = strings.ToLower(strings.TrimSpace(effort))
 	}
 	switch normalized {
 	case "none":
 		return 0, true
 	case "auto":
 		return NormalizeThinkingBudget(model, -1), true
 	case "minimal":
 		return NormalizeThinkingBudget(model, 512), true
 	case "low":
 		return NormalizeThinkingBudget(model, 1024), true
 	case "medium":
 		return NormalizeThinkingBudget(model, 8192), true
 	case "high":
 		return NormalizeThinkingBudget(model, 24576), true
 	case "xhigh":
 		return NormalizeThinkingBudget(model, 32768), true
 	default:
 		return 0, false
 	}
 }
 // ThinkingLevelToBudget maps a Gemini thinkingLevel to a numeric thinking budget (tokens).
 //
 // Mappings:
 //   - "minimal" -> 512
 //   - "low"     -> 1024
 //   - "medium"  -> 8192
 //   - "high"    -> 32768
 //
 // Returns false when the level is empty or unsupported.
 func ThinkingLevelToBudget(level string) (int, bool) {
 	if level == "" {
 		return 0, false
 	}
 	normalized := strings.ToLower(strings.TrimSpace(level))
 	switch normalized {
 	case "minimal":
 		return 512, true
 	case "low":
 		return 1024, true
 	case "medium":
 		return 8192, true
 	case "high":
 		return 32768, true
 	default:
 		return 0, false
 	}
 }
 // ThinkingBudgetToEffort maps a numeric thinking budget (tokens)
 // to a reasoning effort level for level-based models.
 //
 // Mappings:
 //   - 0            -> "none" (or lowest supported level if model doesn't support "none")
 //   - -1           -> "auto"
 //   - 1..1024      -> "low"
 //   - 1025..8192   -> "medium"
 //   - 8193..24576  -> "high"
 //   - 24577..      -> highest supported level for the model (defaults to "xhigh")
 //
 // Returns false when the budget is unsupported (negative values other than -1).
 func ThinkingBudgetToEffort(model string, budget int) (string, bool) {
 	switch {
 	case budget == -1:
 		return "auto", true
 	case budget < -1:
 		return "", false
 	case budget == 0:
 		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
 			return levels[0], true
 		}
 		return "none", true
 	case budget > 0 && budget <= 1024:
 		return "low", true
 	case budget <= 8192:
 		return "medium", true
 	case budget <= 24576:
 		return "high", true
 	case budget > 24576:
 		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
 			return levels[len(levels)-1], true
 		}
 		return "xhigh", true
 	default:
 		return "", false
 	}
 }
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -1,296 +0,0 @@
 package util
 import (
 	"encoding/json"
 	"strconv"
 	"strings"
 )
 const (
 	ThinkingBudgetMetadataKey            = "thinking_budget"
 	ThinkingIncludeThoughtsMetadataKey   = "thinking_include_thoughts"
 	ReasoningEffortMetadataKey           = "reasoning_effort"
 	ThinkingOriginalModelMetadataKey     = "thinking_original_model"
 	ModelMappingOriginalModelMetadataKey = "model_mapping_original_model"
 )
 // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
 // the normalized base model with extracted metadata. Supported pattern:
 //   - "(<value>)" where value can be:
 //   - A numeric budget (e.g., "(8192)", "(16384)")
 //   - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
 //
 // Examples:
 //   - "claude-sonnet-4-5-20250929(16384)" → budget=16384
 //   - "gpt-5.1(high)" → reasoning_effort="high"
 //   - "gemini-2.5-pro(32768)" → budget=32768
 //
 // Note: Empty parentheses "()" are not supported and will be ignored.
 func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 	if modelName == "" {
 		return modelName, nil
 	}
 	baseModel := modelName
 	var (
 		budgetOverride  *int
 		reasoningEffort *string
 		matched         bool
 	)
 	// Match "(<value>)" pattern at the end of the model name
 	if idx := strings.LastIndex(modelName, "("); idx != -1 {
 		if !strings.HasSuffix(modelName, ")") {
 			// Incomplete parenthesis, ignore
 			return baseModel, nil
 		}
 		value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
 		if value == "" {
 			// Empty parentheses not supported
 			return baseModel, nil
 		}
 		candidateBase := modelName[:idx]
 		// Auto-detect: pure numeric → budget, string → reasoning effort level
 		if parsed, ok := parseIntPrefix(value); ok {
 			// Numeric value: treat as thinking budget
 			baseModel = candidateBase
 			budgetOverride = &parsed
 			matched = true
 		} else {
 			// String value: treat as reasoning effort level
 			baseModel = candidateBase
 			raw := strings.ToLower(strings.TrimSpace(value))
 			if raw != "" {
 				reasoningEffort = &raw
 				matched = true
 			}
 		}
 	}
 	if !matched {
 		return baseModel, nil
 	}
 	metadata := map[string]any{
 		ThinkingOriginalModelMetadataKey: modelName,
 	}
 	if budgetOverride != nil {
 		metadata[ThinkingBudgetMetadataKey] = *budgetOverride
 	}
 	if reasoningEffort != nil {
 		metadata[ReasoningEffortMetadataKey] = *reasoningEffort
 	}
 	return baseModel, metadata
 }
 // ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
 // It accepts both the new generic keys and legacy Gemini-specific keys.
 func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
 	if len(metadata) == 0 {
 		return nil, nil, nil, false
 	}
 	var (
 		budgetPtr  *int
 		includePtr *bool
 		effortPtr  *string
 		matched    bool
 	)
 	readBudget := func(key string) {
 		if budgetPtr != nil {
 			return
 		}
 		if raw, ok := metadata[key]; ok {
 			if v, okNumber := parseNumberToInt(raw); okNumber {
 				budget := v
 				budgetPtr = &budget
 				matched = true
 			}
 		}
 	}
 	readInclude := func(key string) {
 		if includePtr != nil {
 			return
 		}
 		if raw, ok := metadata[key]; ok {
 			switch v := raw.(type) {
 			case bool:
 				val := v
 				includePtr = &val
 				matched = true
 			case *bool:
 				if v != nil {
 					val := *v
 					includePtr = &val
 					matched = true
 				}
 			}
 		}
 	}
 	readEffort := func(key string) {
 		if effortPtr != nil {
 			return
 		}
 		if raw, ok := metadata[key]; ok {
 			if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
 				normalized := strings.ToLower(strings.TrimSpace(val))
 				effortPtr = &normalized
 				matched = true
 			}
 		}
 	}
 	readBudget(ThinkingBudgetMetadataKey)
 	readBudget(GeminiThinkingBudgetMetadataKey)
 	readInclude(ThinkingIncludeThoughtsMetadataKey)
 	readInclude(GeminiIncludeThoughtsMetadataKey)
 	readEffort(ReasoningEffortMetadataKey)
 	readEffort("reasoning.effort")
 	return budgetPtr, includePtr, effortPtr, matched
 }
 // ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
 // converting reasoning effort strings into budgets when possible.
 func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
 	budget, include, effort, matched := ThinkingFromMetadata(metadata)
 	if !matched {
 		return nil, nil, false
 	}
 	// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
 	// Claude/Gemini-style protocols, so we don't derive budgets for them here.
 	if ModelUsesThinkingLevels(model) {
 		return nil, nil, false
 	}
 	if budget == nil && effort != nil {
 		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
 			budget = &derived
 		}
 	}
 	return budget, include, budget != nil || include != nil || effort != nil
 }
 // ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
 // inferring "auto" and "none" when budgets request dynamic or disabled thinking.
 func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
 	budget, include, effort, matched := ThinkingFromMetadata(metadata)
 	if !matched {
 		return "", false
 	}
 	if effort != nil && *effort != "" {
 		return strings.ToLower(strings.TrimSpace(*effort)), true
 	}
 	if budget != nil {
 		switch *budget {
 		case -1:
 			return "auto", true
 		case 0:
 			return "none", true
 		}
 	}
 	if include != nil && !*include {
 		return "none", true
 	}
 	return "", true
 }
 // ResolveOriginalModel returns the original model name stored in metadata (if present),
 // otherwise falls back to the provided model.
 func ResolveOriginalModel(model string, metadata map[string]any) string {
 	normalize := func(name string) string {
 		if name == "" {
 			return ""
 		}
 		if base, _ := NormalizeThinkingModel(name); base != "" {
 			return base
 		}
 		return strings.TrimSpace(name)
 	}
 	if metadata != nil {
 		if v, ok := metadata[ModelMappingOriginalModelMetadataKey]; ok {
 			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
 				if base := normalize(s); base != "" {
 					return base
 				}
 			}
 		}
 		if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
 			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
 				if base := normalize(s); base != "" {
 					return base
 				}
 			}
 		}
 		if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
 			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
 				if base := normalize(s); base != "" {
 					return base
 				}
 			}
 		}
 	}
 	// Fallback: try to re-normalize the model name when metadata was dropped.
 	if base := normalize(model); base != "" {
 		return base
 	}
 	return model
 }
 func parseIntPrefix(value string) (int, bool) {
 	if value == "" {
 		return 0, false
 	}
 	digits := strings.TrimLeft(value, "-")
 	if digits == "" {
 		return 0, false
 	}
 	end := len(digits)
 	for i := 0; i < len(digits); i++ {
 		if digits[i] < '0' || digits[i] > '9' {
 			end = i
 			break
 		}
 	}
 	if end == 0 {
 		return 0, false
 	}
 	val, err := strconv.Atoi(digits[:end])
 	if err != nil {
 		return 0, false
 	}
 	return val, true
 }
 func parseNumberToInt(raw any) (int, bool) {
 	switch v := raw.(type) {
 	case int:
 		return v, true
 	case int32:
 		return int(v), true
 	case int64:
 		return int(v), true
 	case float64:
 		return int(v), true
 	case json.Number:
 		if val, err := v.Int64(); err == nil {
 			return int(val), true
 		}
 	case string:
 		if strings.TrimSpace(v) == "" {
 			return 0, false
 		}
 		if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
 			return parsed, true
 		}
 	}
 	return 0, false
 }
--- a/internal/util/thinking_text.go
+++ b/internal/util/thinking_text.go
@@ -1,87 +0,0 @@
 package util
 import (
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // GetThinkingText extracts the thinking text from a content part.
 // Handles various formats:
 // - Simple string: { "thinking": "text" } or { "text": "text" }
 // - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
 // - Gemini-style: { "thought": true, "text": "text" }
 // Returns the extracted text string.
 func GetThinkingText(part gjson.Result) string {
 	// Try direct text field first (Gemini-style)
 	if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
 		return text.String()
 	}
 	// Try thinking field
 	thinkingField := part.Get("thinking")
 	if !thinkingField.Exists() {
 		return ""
 	}
 	// thinking is a string
 	if thinkingField.Type == gjson.String {
 		return thinkingField.String()
 	}
 	// thinking is an object with inner text/thinking
 	if thinkingField.IsObject() {
 		if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String {
 			return inner.String()
 		}
 		if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String {
 			return inner.String()
 		}
 	}
 	return ""
 }
 // GetThinkingTextFromJSON extracts thinking text from a raw JSON string.
 func GetThinkingTextFromJSON(jsonStr string) string {
 	return GetThinkingText(gjson.Parse(jsonStr))
 }
 // SanitizeThinkingPart normalizes a thinking part to a canonical form.
 // Strips cache_control and other non-essential fields.
 // Returns the sanitized part as JSON string.
 func SanitizeThinkingPart(part gjson.Result) string {
 	// Gemini-style: { thought: true, text, thoughtSignature }
 	if part.Get("thought").Bool() {
 		result := `{"thought":true}`
 		if text := GetThinkingText(part); text != "" {
 			result, _ = sjson.Set(result, "text", text)
 		}
 		if sig := part.Get("thoughtSignature"); sig.Exists() && sig.Type == gjson.String {
 			result, _ = sjson.Set(result, "thoughtSignature", sig.String())
 		}
 		return result
 	}
 	// Anthropic-style: { type: "thinking", thinking, signature }
 	if part.Get("type").String() == "thinking" || part.Get("thinking").Exists() {
 		result := `{"type":"thinking"}`
 		if text := GetThinkingText(part); text != "" {
 			result, _ = sjson.Set(result, "thinking", text)
 		}
 		if sig := part.Get("signature"); sig.Exists() && sig.Type == gjson.String {
 			result, _ = sjson.Set(result, "signature", sig.String())
 		}
 		return result
 	}
 	// Not a thinking part, return as-is but strip cache_control
 	return StripCacheControl(part.Raw)
 }
 // StripCacheControl removes cache_control and providerOptions from a JSON object.
 func StripCacheControl(jsonStr string) string {
 	result := jsonStr
 	result, _ = sjson.Delete(result, "cache_control")
 	result, _ = sjson.Delete(result, "providerOptions")
 	return result
 }
--- a/internal/watcher/config_reload.go
+++ b/internal/watcher/config_reload.go
@@ -127,7 +127,7 @@ func (w *Watcher) reloadConfig() bool {
 	}
 	authDirChanged := oldConfig == nil || oldConfig.AuthDir != newConfig.AuthDir
-	forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelMappings, newConfig.OAuthModelMappings))
+	forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias))
 	log.Infof("config successfully reloaded, triggering client reload")
 	w.reloadClients(authDirChanged, affectedOAuthProviders, forceAuthRefresh)
--- a/internal/watcher/diff/config_diff.go
+++ b/internal/watcher/diff/config_diff.go
@@ -212,7 +212,7 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if entries, _ := DiffOAuthExcludedModelChanges(oldCfg.OAuthExcludedModels, newCfg.OAuthExcludedModels); len(entries) > 0 {
 		changes = append(changes, entries...)
 	}
-	if entries, _ := DiffOAuthModelMappingChanges(oldCfg.OAuthModelMappings, newCfg.OAuthModelMappings); len(entries) > 0 {
+	if entries, _ := DiffOAuthModelAliasChanges(oldCfg.OAuthModelAlias, newCfg.OAuthModelAlias); len(entries) > 0 {
 		changes = append(changes, entries...)
 	}
--- a/internal/watcher/diff/oauth_model_mappings.go
+++ b/internal/watcher/diff/oauth_model_mappings.go
@@ -10,23 +10,23 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 )
-type OAuthModelMappingsSummary struct {
+type OAuthModelAliasSummary struct {
 	hash  string
 	count int
 }
-// SummarizeOAuthModelMappings summarizes OAuth model mappings per channel.
+// SummarizeOAuthModelAlias summarizes OAuth model alias per channel.
-func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string]OAuthModelMappingsSummary {
+func SummarizeOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string]OAuthModelAliasSummary {
 	if len(entries) == 0 {
 		return nil
 	}
-	out := make(map[string]OAuthModelMappingsSummary, len(entries))
+	out := make(map[string]OAuthModelAliasSummary, len(entries))
 	for k, v := range entries {
 		key := strings.ToLower(strings.TrimSpace(k))
 		if key == "" {
 			continue
 		}
-		out[key] = summarizeOAuthModelMappingList(v)
+		out[key] = summarizeOAuthModelAliasList(v)
 	}
 	if len(out) == 0 {
 		return nil
@@ -34,10 +34,10 @@ func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) m
 	return out
 }
-// DiffOAuthModelMappingChanges compares OAuth model mappings maps.
+// DiffOAuthModelAliasChanges compares OAuth model alias maps.
-func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMapping) ([]string, []string) {
+func DiffOAuthModelAliasChanges(oldMap, newMap map[string][]config.OAuthModelAlias) ([]string, []string) {
-	oldSummary := SummarizeOAuthModelMappings(oldMap)
+	oldSummary := SummarizeOAuthModelAlias(oldMap)
-	newSummary := SummarizeOAuthModelMappings(newMap)
+	newSummary := SummarizeOAuthModelAlias(newMap)
 	keys := make(map[string]struct{}, len(oldSummary)+len(newSummary))
 	for k := range oldSummary {
 		keys[k] = struct{}{}
@@ -52,13 +52,13 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa
 		newInfo, okNew := newSummary[key]
 		switch {
 		case okOld && !okNew:
-			changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: removed", key))
+			changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: removed", key))
 			affected = append(affected, key)
 		case !okOld && okNew:
-			changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: added (%d entries)", key, newInfo.count))
+			changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: added (%d entries)", key, newInfo.count))
 			affected = append(affected, key)
 		case okOld && okNew && oldInfo.hash != newInfo.hash:
-			changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
+			changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
 			affected = append(affected, key)
 		}
 	}
@@ -67,20 +67,20 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa
 	return changes, affected
 }
-func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMappingsSummary {
+func summarizeOAuthModelAliasList(list []config.OAuthModelAlias) OAuthModelAliasSummary {
 	if len(list) == 0 {
-		return OAuthModelMappingsSummary{}
+		return OAuthModelAliasSummary{}
 	}
 	seen := make(map[string]struct{}, len(list))
 	normalized := make([]string, 0, len(list))
-	for _, mapping := range list {
+	for _, alias := range list {
-		name := strings.ToLower(strings.TrimSpace(mapping.Name))
+		name := strings.ToLower(strings.TrimSpace(alias.Name))
-		alias := strings.ToLower(strings.TrimSpace(mapping.Alias))
+		aliasVal := strings.ToLower(strings.TrimSpace(alias.Alias))
-		if name == "" || alias == "" {
+		if name == "" || aliasVal == "" {
 			continue
 		}
-		key := name + "->" + alias
+		key := name + "->" + aliasVal
-		if mapping.Fork {
+		if alias.Fork {
 			key += "|fork"
 		}
 		if _, exists := seen[key]; exists {
@@ -90,11 +90,11 @@ func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMa
 		normalized = append(normalized, key)
 	}
 	if len(normalized) == 0 {
-		return OAuthModelMappingsSummary{}
+		return OAuthModelAliasSummary{}
 	}
 	sort.Strings(normalized)
 	sum := sha256.Sum256([]byte(strings.Join(normalized, "|")))
-	return OAuthModelMappingsSummary{
+	return OAuthModelAliasSummary{
 		hash:  hex.EncodeToString(sum[:]),
 		count: len(normalized),
 	}
--- a/internal/watcher/synthesizer/config.go
+++ b/internal/watcher/synthesizer/config.go
@@ -2,6 +2,7 @@ package synthesizer
 import (
 	"fmt"
 	"strconv"
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff"
@@ -59,6 +60,9 @@ func (s *ConfigSynthesizer) synthesizeGeminiKeys(ctx *SynthesisContext) []*corea
 			"source":  fmt.Sprintf("config:gemini[%s]", token),
 			"api_key": key,
 		}
 		if entry.Priority != 0 {
 			attrs["priority"] = strconv.Itoa(entry.Priority)
 		}
 		if base != "" {
 			attrs["base_url"] = base
 		}
@@ -103,6 +107,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea
 			"source":  fmt.Sprintf("config:claude[%s]", token),
 			"api_key": key,
 		}
 		if ck.Priority != 0 {
 			attrs["priority"] = strconv.Itoa(ck.Priority)
 		}
 		if base != "" {
 			attrs["base_url"] = base
 		}
@@ -147,6 +154,9 @@ func (s *ConfigSynthesizer) synthesizeCodexKeys(ctx *SynthesisContext) []*coreau
 			"source":  fmt.Sprintf("config:codex[%s]", token),
 			"api_key": key,
 		}
 		if ck.Priority != 0 {
 			attrs["priority"] = strconv.Itoa(ck.Priority)
 		}
 		if ck.BaseURL != "" {
 			attrs["base_url"] = ck.BaseURL
 		}
@@ -202,6 +212,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor
 				"compat_name":  compat.Name,
 				"provider_key": providerName,
 			}
 			if compat.Priority != 0 {
 				attrs["priority"] = strconv.Itoa(compat.Priority)
 			}
 			if key != "" {
 				attrs["api_key"] = key
 			}
@@ -233,6 +246,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor
 				"compat_name":  compat.Name,
 				"provider_key": providerName,
 			}
 			if compat.Priority != 0 {
 				attrs["priority"] = strconv.Itoa(compat.Priority)
 			}
 			if hash := diff.ComputeOpenAICompatModelsHash(compat.Models); hash != "" {
 				attrs["models_hash"] = hash
 			}
@@ -275,6 +291,9 @@ func (s *ConfigSynthesizer) synthesizeVertexCompat(ctx *SynthesisContext) []*cor
 			"base_url":     base,
 			"provider_key": providerName,
 		}
 		if compat.Priority != 0 {
 			attrs["priority"] = strconv.Itoa(compat.Priority)
 		}
 		if key != "" {
 			attrs["api_key"] = key
 		}
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -56,8 +56,12 @@ func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
 		for k, v := range model {
 			normalizedModel[k] = v
 		}
-		if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
+		if name, ok := normalizedModel["name"].(string); ok && name != "" {
-			normalizedModel["name"] = "models/" + name
+			if !strings.HasPrefix(name, "models/") {
 				normalizedModel["name"] = "models/" + name
 			}
 			normalizedModel["displayName"] = name
 			normalizedModel["description"] = name
 		}
 		if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
 			normalizedModel["supportedGenerationMethods"] = defaultMethods
@@ -85,94 +89,35 @@ func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) {
 		return
 	}
 	action := strings.TrimPrefix(request.Action, "/")
-	switch action {
+
-	case "gemini-3-pro-preview":
+	// Get dynamic models from the global registry and find the matching one
-		c.JSON(http.StatusOK, gin.H{
+	availableModels := h.Models()
-			"name":             "models/gemini-3-pro-preview",
+	var targetModel map[string]any
-			"version":          "3",
+
-			"displayName":      "Gemini 3 Pro Preview",
+	for _, model := range availableModels {
-			"description":      "Gemini 3 Pro Preview",
+		name, _ := model["name"].(string)
-			"inputTokenLimit":  1048576,
+		// Match name with or without 'models/' prefix
-			"outputTokenLimit": 65536,
+		if name == action || name == "models/"+action {
-			"supportedGenerationMethods": []string{
+			targetModel = model
-				"generateContent",
+			break
-				"countTokens",
+		}
 				"createCachedContent",
 				"batchGenerateContent",
 			},
 			"temperature":    1,
 			"topP":           0.95,
 			"topK":           64,
 			"maxTemperature": 2,
 			"thinking":       true,
 		},
 		)
 	case "gemini-2.5-pro":
 		c.JSON(http.StatusOK, gin.H{
 			"name":             "models/gemini-2.5-pro",
 			"version":          "2.5",
 			"displayName":      "Gemini 2.5 Pro",
 			"description":      "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			"inputTokenLimit":  1048576,
 			"outputTokenLimit": 65536,
 			"supportedGenerationMethods": []string{
 				"generateContent",
 				"countTokens",
 				"createCachedContent",
 				"batchGenerateContent",
 			},
 			"temperature":    1,
 			"topP":           0.95,
 			"topK":           64,
 			"maxTemperature": 2,
 			"thinking":       true,
 		},
 		)
 	case "gemini-2.5-flash":
 		c.JSON(http.StatusOK, gin.H{
 			"name":             "models/gemini-2.5-flash",
 			"version":          "001",
 			"displayName":      "Gemini 2.5 Flash",
 			"description":      "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			"inputTokenLimit":  1048576,
 			"outputTokenLimit": 65536,
 			"supportedGenerationMethods": []string{
 				"generateContent",
 				"countTokens",
 				"createCachedContent",
 				"batchGenerateContent",
 			},
 			"temperature":    1,
 			"topP":           0.95,
 			"topK":           64,
 			"maxTemperature": 2,
 			"thinking":       true,
 		})
 	case "gpt-5":
 		c.JSON(http.StatusOK, gin.H{
 			"name":             "gpt-5",
 			"version":          "001",
 			"displayName":      "GPT 5",
 			"description":      "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
 			"inputTokenLimit":  400000,
 			"outputTokenLimit": 128000,
 			"supportedGenerationMethods": []string{
 				"generateContent",
 			},
 			"temperature":    1,
 			"topP":           0.95,
 			"topK":           64,
 			"maxTemperature": 2,
 			"thinking":       true,
 		})
 	default:
 		c.JSON(http.StatusNotFound, handlers.ErrorResponse{
 			Error: handlers.ErrorDetail{
 				Message: "Not Found",
 				Type:    "not_found",
 			},
 		})
 	}
 	if targetModel != nil {
 		// Ensure the name has 'models/' prefix in the output if it's a Gemini model
 		if name, ok := targetModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
 			targetModel["name"] = "models/" + name
 		}
 		c.JSON(http.StatusOK, targetModel)
 		return
 	}
 	c.JSON(http.StatusNotFound, handlers.ErrorResponse{
 		Error: handlers.ErrorDetail{
 			Message: "Not Found",
 			Type:    "not_found",
 		},
 	})
 }
 // GeminiHandler handles POST requests for Gemini API operations.
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -16,6 +16,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -379,7 +380,7 @@ func appendAPIResponse(c *gin.Context, data []byte) {
 // ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
-	providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
+	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
 		return nil, errMsg
 	}
@@ -388,16 +389,13 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
 	}
 	if cloned := cloneMetadata(metadata); cloned != nil {
 		req.Metadata = cloned
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
+	opts.Metadata = reqMeta
 	resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
 	if err != nil {
 		status := http.StatusInternalServerError
@@ -420,7 +418,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 // ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
-	providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
+	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
 		return nil, errMsg
 	}
@@ -429,16 +427,13 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
 	}
 	if cloned := cloneMetadata(metadata); cloned != nil {
 		req.Metadata = cloned
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
+	opts.Metadata = reqMeta
 	resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts)
 	if err != nil {
 		status := http.StatusInternalServerError
@@ -461,7 +456,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 // ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
-	providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
+	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
 		errChan <- errMsg
@@ -473,16 +468,13 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
 	}
 	if cloned := cloneMetadata(metadata); cloned != nil {
 		req.Metadata = cloned
 	}
 	opts := coreexecutor.Options{
 		Stream:          true,
 		Alt:             alt,
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
+	opts.Metadata = reqMeta
 	chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
 	if err != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
@@ -595,38 +587,40 @@ func statusFromError(err error) int {
 	return 0
 }
-func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
+func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, err *interfaces.ErrorMessage) {
-	// Resolve "auto" model to an actual available model first
+	resolvedModelName := modelName
-	resolvedModelName := util.ResolveAutoModel(modelName)
+	initialSuffix := thinking.ParseSuffix(modelName)
-
+	if initialSuffix.ModelName == "auto" {
-	// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
+		resolvedBase := util.ResolveAutoModel(initialSuffix.ModelName)
-	normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
+		if initialSuffix.HasSuffix {
-
+			resolvedModelName = fmt.Sprintf("%s(%s)", resolvedBase, initialSuffix.RawSuffix)
-	// Use the normalizedModel to get the provider name.
+		} else {
-	providers = util.GetProviderName(normalizedModel)
+			resolvedModelName = resolvedBase
 	if len(providers) == 0 && metadata != nil {
 		if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
 			if originalModel, okStr := originalRaw.(string); okStr {
 				originalModel = strings.TrimSpace(originalModel)
 				if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
 					if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
 						providers = altProviders
 						normalizedModel = originalModel
 					}
 				}
 			}
 		}
 	} else {
 		resolvedModelName = util.ResolveAutoModel(modelName)
 	}
 	parsed := thinking.ParseSuffix(resolvedModelName)
 	baseModel := strings.TrimSpace(parsed.ModelName)
 	providers = util.GetProviderName(baseModel)
 	// Fallback: if baseModel has no provider but differs from resolvedModelName,
 	// try using the full model name. This handles edge cases where custom models
 	// may be registered with their full suffixed name (e.g., "my-model(8192)").
 	// Evaluated in Story 11.8: This fallback is intentionally preserved to support
 	// custom model registrations that include thinking suffixes.
 	if len(providers) == 0 && baseModel != resolvedModelName {
 		providers = util.GetProviderName(resolvedModelName)
 	}
 	if len(providers) == 0 {
-		return nil, "", nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
+		return nil, "", &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
 	}
-	// If it's a dynamic model, the normalizedModel was already set to extractedModelName.
+	// The thinking suffix is preserved in the model name itself, so no
-	// If it's a non-dynamic model, normalizedModel was set by normalizeModelMetadata.
+	// metadata-based configuration passing is needed.
-	// So, normalizedModel is already correctly set at this point.
+	return providers, resolvedModelName, nil
 	return providers, normalizedModel, metadata, nil
 }
 func cloneBytes(src []byte) []byte {
@@ -638,10 +632,6 @@ func cloneBytes(src []byte) []byte {
 	return dst
 }
 func normalizeModelMetadata(modelName string) (string, map[string]any) {
 	return util.NormalizeThinkingModel(modelName)
 }
 func cloneMetadata(src map[string]any) map[string]any {
 	if len(src) == 0 {
 		return nil
--- a/sdk/api/handlers/handlers_request_details_test.go
+++ b/sdk/api/handlers/handlers_request_details_test.go
@@ -0,0 +1,118 @@
 package handlers
 import (
 	"reflect"
 	"testing"
 	"time"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 )
 func TestGetRequestDetails_PreservesSuffix(t *testing.T) {
 	modelRegistry := registry.GetGlobalRegistry()
 	now := time.Now().Unix()
 	modelRegistry.RegisterClient("test-request-details-gemini", "gemini", []*registry.ModelInfo{
 		{ID: "gemini-2.5-pro", Created: now + 30},
 		{ID: "gemini-2.5-flash", Created: now + 25},
 	})
 	modelRegistry.RegisterClient("test-request-details-openai", "openai", []*registry.ModelInfo{
 		{ID: "gpt-5.2", Created: now + 20},
 	})
 	modelRegistry.RegisterClient("test-request-details-claude", "claude", []*registry.ModelInfo{
 		{ID: "claude-sonnet-4-5", Created: now + 5},
 	})
 	// Ensure cleanup of all test registrations.
 	clientIDs := []string{
 		"test-request-details-gemini",
 		"test-request-details-openai",
 		"test-request-details-claude",
 	}
 	for _, clientID := range clientIDs {
 		id := clientID
 		t.Cleanup(func() {
 			modelRegistry.UnregisterClient(id)
 		})
 	}
 	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, coreauth.NewManager(nil, nil, nil))
 	tests := []struct {
 		name          string
 		inputModel    string
 		wantProviders []string
 		wantModel     string
 		wantErr       bool
 	}{
 		{
 			name:          "numeric suffix preserved",
 			inputModel:    "gemini-2.5-pro(8192)",
 			wantProviders: []string{"gemini"},
 			wantModel:     "gemini-2.5-pro(8192)",
 			wantErr:       false,
 		},
 		{
 			name:          "level suffix preserved",
 			inputModel:    "gpt-5.2(high)",
 			wantProviders: []string{"openai"},
 			wantModel:     "gpt-5.2(high)",
 			wantErr:       false,
 		},
 		{
 			name:          "no suffix unchanged",
 			inputModel:    "claude-sonnet-4-5",
 			wantProviders: []string{"claude"},
 			wantModel:     "claude-sonnet-4-5",
 			wantErr:       false,
 		},
 		{
 			name:          "unknown model with suffix",
 			inputModel:    "unknown-model(8192)",
 			wantProviders: nil,
 			wantModel:     "",
 			wantErr:       true,
 		},
 		{
 			name:          "auto suffix resolved",
 			inputModel:    "auto(high)",
 			wantProviders: []string{"gemini"},
 			wantModel:     "gemini-2.5-pro(high)",
 			wantErr:       false,
 		},
 		{
 			name:          "special suffix none preserved",
 			inputModel:    "gemini-2.5-flash(none)",
 			wantProviders: []string{"gemini"},
 			wantModel:     "gemini-2.5-flash(none)",
 			wantErr:       false,
 		},
 		{
 			name:          "special suffix auto preserved",
 			inputModel:    "claude-sonnet-4-5(auto)",
 			wantProviders: []string{"claude"},
 			wantModel:     "claude-sonnet-4-5(auto)",
 			wantErr:       false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			providers, model, errMsg := handler.getRequestDetails(tt.inputModel)
 			if (errMsg != nil) != tt.wantErr {
 				t.Fatalf("getRequestDetails() error = %v, wantErr %v", errMsg, tt.wantErr)
 			}
 			if errMsg != nil {
 				return
 			}
 			if !reflect.DeepEqual(providers, tt.wantProviders) {
 				t.Fatalf("getRequestDetails() providers = %v, want %v", providers, tt.wantProviders)
 			}
 			if model != tt.wantModel {
 				t.Fatalf("getRequestDetails() model = %v, want %v", model, tt.wantModel)
 			}
 		})
 	}
 }
--- a/sdk/auth/antigravity.go
+++ b/sdk/auth/antigravity.go
@@ -60,6 +60,11 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o
 		opts = &LoginOptions{}
 	}
 	callbackPort := antigravityCallbackPort
 	if opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
 	httpClient := util.SetProxy(&cfg.SDKConfig, &http.Client{})
 	state, err := misc.GenerateRandomState()
@@ -67,7 +72,7 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o
 		return nil, fmt.Errorf("antigravity: failed to generate state: %w", err)
 	}
-	srv, port, cbChan, errServer := startAntigravityCallbackServer()
+	srv, port, cbChan, errServer := startAntigravityCallbackServer(callbackPort)
 	if errServer != nil {
 		return nil, fmt.Errorf("antigravity: failed to start callback server: %w", errServer)
 	}
@@ -224,13 +229,16 @@ type callbackResult struct {
 	State string
 }
-func startAntigravityCallbackServer() (*http.Server, int, <-chan callbackResult, error) {
+func startAntigravityCallbackServer(port int) (*http.Server, int, <-chan callbackResult, error) {
-	addr := fmt.Sprintf(":%d", antigravityCallbackPort)
+	if port <= 0 {
 		port = antigravityCallbackPort
 	}
 	addr := fmt.Sprintf(":%d", port)
 	listener, err := net.Listen("tcp", addr)
 	if err != nil {
 		return nil, 0, nil, err
 	}
-	port := listener.Addr().(*net.TCPAddr).Port
+	port = listener.Addr().(*net.TCPAddr).Port
 	resultCh := make(chan callbackResult, 1)
 	mux := http.NewServeMux()
@@ -374,7 +382,7 @@ func fetchAntigravityProjectID(ctx context.Context, accessToken string, httpClie
 	// Call loadCodeAssist to get the project
 	loadReqBody := map[string]any{
 		"metadata": map[string]string{
-			"ideType":    "IDE_UNSPECIFIED",
+			"ideType":    "ANTIGRAVITY",
 			"platform":   "PLATFORM_UNSPECIFIED",
 			"pluginType": "GEMINI",
 		},
@@ -434,8 +442,134 @@ func fetchAntigravityProjectID(ctx context.Context, accessToken string, httpClie
 	}
 	if projectID == "" {
-		return "", fmt.Errorf("no cloudaicompanionProject in response")
+		tierID := "legacy-tier"
 		if tiers, okTiers := loadResp["allowedTiers"].([]any); okTiers {
 			for _, rawTier := range tiers {
 				tier, okTier := rawTier.(map[string]any)
 				if !okTier {
 					continue
 				}
 				if isDefault, okDefault := tier["isDefault"].(bool); okDefault && isDefault {
 					if id, okID := tier["id"].(string); okID && strings.TrimSpace(id) != "" {
 						tierID = strings.TrimSpace(id)
 						break
 					}
 				}
 			}
 		}
 		projectID, err = antigravityOnboardUser(ctx, accessToken, tierID, httpClient)
 		if err != nil {
 			return "", err
 		}
 		return projectID, nil
 	}
 	return projectID, nil
 }
 // antigravityOnboardUser attempts to fetch the project ID via onboardUser by polling for completion.
 // It returns an empty string when the operation times out or completes without a project ID.
 func antigravityOnboardUser(ctx context.Context, accessToken, tierID string, httpClient *http.Client) (string, error) {
 	if httpClient == nil {
 		httpClient = http.DefaultClient
 	}
 	fmt.Println("Antigravity: onboarding user...", tierID)
 	requestBody := map[string]any{
 		"tierId": tierID,
 		"metadata": map[string]string{
 			"ideType":    "ANTIGRAVITY",
 			"platform":   "PLATFORM_UNSPECIFIED",
 			"pluginType": "GEMINI",
 		},
 	}
 	rawBody, errMarshal := json.Marshal(requestBody)
 	if errMarshal != nil {
 		return "", fmt.Errorf("marshal request body: %w", errMarshal)
 	}
 	maxAttempts := 5
 	for attempt := 1; attempt <= maxAttempts; attempt++ {
 		log.Debugf("Polling attempt %d/%d", attempt, maxAttempts)
 		reqCtx := ctx
 		var cancel context.CancelFunc
 		if reqCtx == nil {
 			reqCtx = context.Background()
 		}
 		reqCtx, cancel = context.WithTimeout(reqCtx, 30*time.Second)
 		endpointURL := fmt.Sprintf("%s/%s:onboardUser", antigravityAPIEndpoint, antigravityAPIVersion)
 		req, errRequest := http.NewRequestWithContext(reqCtx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
 		if errRequest != nil {
 			cancel()
 			return "", fmt.Errorf("create request: %w", errRequest)
 		}
 		req.Header.Set("Authorization", "Bearer "+accessToken)
 		req.Header.Set("Content-Type", "application/json")
 		req.Header.Set("User-Agent", antigravityAPIUserAgent)
 		req.Header.Set("X-Goog-Api-Client", antigravityAPIClient)
 		req.Header.Set("Client-Metadata", antigravityClientMetadata)
 		resp, errDo := httpClient.Do(req)
 		if errDo != nil {
 			cancel()
 			return "", fmt.Errorf("execute request: %w", errDo)
 		}
 		bodyBytes, errRead := io.ReadAll(resp.Body)
 		if errClose := resp.Body.Close(); errClose != nil {
 			log.Errorf("close body error: %v", errClose)
 		}
 		cancel()
 		if errRead != nil {
 			return "", fmt.Errorf("read response: %w", errRead)
 		}
 		if resp.StatusCode == http.StatusOK {
 			var data map[string]any
 			if errDecode := json.Unmarshal(bodyBytes, &data); errDecode != nil {
 				return "", fmt.Errorf("decode response: %w", errDecode)
 			}
 			if done, okDone := data["done"].(bool); okDone && done {
 				projectID := ""
 				if responseData, okResp := data["response"].(map[string]any); okResp {
 					switch projectValue := responseData["cloudaicompanionProject"].(type) {
 					case map[string]any:
 						if id, okID := projectValue["id"].(string); okID {
 							projectID = strings.TrimSpace(id)
 						}
 					case string:
 						projectID = strings.TrimSpace(projectValue)
 					}
 				}
 				if projectID != "" {
 					log.Infof("Successfully fetched project_id: %s", projectID)
 					return projectID, nil
 				}
 				return "", fmt.Errorf("no project_id in response")
 			}
 			time.Sleep(2 * time.Second)
 			continue
 		}
 		responsePreview := strings.TrimSpace(string(bodyBytes))
 		if len(responsePreview) > 500 {
 			responsePreview = responsePreview[:500]
 		}
 		responseErr := responsePreview
 		if len(responseErr) > 200 {
 			responseErr = responseErr[:200]
 		}
 		return "", fmt.Errorf("http %d: %s", resp.StatusCode, responseErr)
 	}
 	return "", nil
 }
--- a/sdk/auth/claude.go
+++ b/sdk/auth/claude.go
@@ -47,6 +47,11 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 		opts = &LoginOptions{}
 	}
 	callbackPort := a.CallbackPort
 	if opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
 	pkceCodes, err := claude.GeneratePKCECodes()
 	if err != nil {
 		return nil, fmt.Errorf("claude pkce generation failed: %w", err)
@@ -57,7 +62,7 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 		return nil, fmt.Errorf("claude state generation failed: %w", err)
 	}
-	oauthServer := claude.NewOAuthServer(a.CallbackPort)
+	oauthServer := claude.NewOAuthServer(callbackPort)
 	if err = oauthServer.Start(); err != nil {
 		if strings.Contains(err.Error(), "already in use") {
 			return nil, claude.NewAuthenticationError(claude.ErrPortInUse, err)
@@ -84,15 +89,15 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 		fmt.Println("Opening browser for Claude authentication")
 		if !browser.IsAvailable() {
 			log.Warn("No browser available; please open the URL manually")
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		} else if err = browser.OpenURL(authURL); err != nil {
 			log.Warnf("Failed to open browser automatically: %v", err)
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(a.CallbackPort)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 	}
--- a/sdk/auth/codex.go
+++ b/sdk/auth/codex.go
@@ -47,6 +47,11 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		opts = &LoginOptions{}
 	}
 	callbackPort := a.CallbackPort
 	if opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
 	pkceCodes, err := codex.GeneratePKCECodes()
 	if err != nil {
 		return nil, fmt.Errorf("codex pkce generation failed: %w", err)
@@ -57,7 +62,7 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		return nil, fmt.Errorf("codex state generation failed: %w", err)
 	}
-	oauthServer := codex.NewOAuthServer(a.CallbackPort)
+	oauthServer := codex.NewOAuthServer(callbackPort)
 	if err = oauthServer.Start(); err != nil {
 		if strings.Contains(err.Error(), "already in use") {
 			return nil, codex.NewAuthenticationError(codex.ErrPortInUse, err)
@@ -83,15 +88,15 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		fmt.Println("Opening browser for Codex authentication")
 		if !browser.IsAvailable() {
 			log.Warn("No browser available; please open the URL manually")
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		} else if err = browser.OpenURL(authURL); err != nil {
 			log.Warnf("Failed to open browser automatically: %v", err)
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(a.CallbackPort)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 	}
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -5,8 +5,10 @@ import (
 	"encoding/json"
 	"fmt"
 	"io/fs"
 	"net/http"
 	"os"
 	"path/filepath"
 	"reflect"
 	"strings"
 	"sync"
 	"time"
@@ -77,15 +79,23 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
 			if metadataEqualIgnoringTimestamps(existing, raw) {
 				return path, nil
 			}
-		} else if errRead != nil && !os.IsNotExist(errRead) {
+			file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600)
 			if errOpen != nil {
 				return "", fmt.Errorf("auth filestore: open existing failed: %w", errOpen)
 			}
 			if _, errWrite := file.Write(raw); errWrite != nil {
 				_ = file.Close()
 				return "", fmt.Errorf("auth filestore: write existing failed: %w", errWrite)
 			}
 			if errClose := file.Close(); errClose != nil {
 				return "", fmt.Errorf("auth filestore: close existing failed: %w", errClose)
 			}
 			return path, nil
 		} else if !os.IsNotExist(errRead) {
 			return "", fmt.Errorf("auth filestore: read existing failed: %w", errRead)
 		}
-		tmp := path + ".tmp"
+		if errWrite := os.WriteFile(path, raw, 0o600); errWrite != nil {
-		if errWrite := os.WriteFile(tmp, raw, 0o600); errWrite != nil {
+			return "", fmt.Errorf("auth filestore: write file failed: %w", errWrite)
 			return "", fmt.Errorf("auth filestore: write temp failed: %w", errWrite)
 		}
 		if errRename := os.Rename(tmp, path); errRename != nil {
 			return "", fmt.Errorf("auth filestore: rename failed: %w", errRename)
 		}
 	default:
 		return "", fmt.Errorf("auth filestore: nothing to persist for %s", auth.ID)
@@ -178,6 +188,30 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 	if provider == "" {
 		provider = "unknown"
 	}
 	if provider == "antigravity" {
 		projectID := ""
 		if pid, ok := metadata["project_id"].(string); ok {
 			projectID = strings.TrimSpace(pid)
 		}
 		if projectID == "" {
 			accessToken := ""
 			if token, ok := metadata["access_token"].(string); ok {
 				accessToken = strings.TrimSpace(token)
 			}
 			if accessToken != "" {
 				fetchedProjectID, errFetch := FetchAntigravityProjectID(context.Background(), accessToken, http.DefaultClient)
 				if errFetch == nil && strings.TrimSpace(fetchedProjectID) != "" {
 					metadata["project_id"] = strings.TrimSpace(fetchedProjectID)
 					if raw, errMarshal := json.Marshal(metadata); errMarshal == nil {
 						if file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600); errOpen == nil {
 							_, _ = file.Write(raw)
 							_ = file.Close()
 						}
 					}
 				}
 			}
 		}
 	}
 	info, err := os.Stat(path)
 	if err != nil {
 		return nil, fmt.Errorf("stat file: %w", err)
@@ -266,92 +300,28 @@ func (s *FileTokenStore) baseDirSnapshot() string {
 	return s.baseDir
 }
-// DEPRECATED: Use metadataEqualIgnoringTimestamps for comparing auth metadata.
+// metadataEqualIgnoringTimestamps compares two metadata JSON blobs, ignoring volatile fields that
-// This function is kept for backward compatibility but can cause refresh loops.
+// change on every refresh but don't affect authentication logic.
 func jsonEqual(a, b []byte) bool {
 	var objA any
 	var objB any
 	if err := json.Unmarshal(a, &objA); err != nil {
 		return false
 	}
 	if err := json.Unmarshal(b, &objB); err != nil {
 		return false
 	}
 	return deepEqualJSON(objA, objB)
 }
 // metadataEqualIgnoringTimestamps compares two metadata JSON blobs,
 // ignoring fields that change on every refresh but don't affect functionality.
 // This prevents unnecessary file writes that would trigger watcher events and
 // create refresh loops.
 func metadataEqualIgnoringTimestamps(a, b []byte) bool {
-	var objA, objB map[string]any
+	var objA map[string]any
-	if err := json.Unmarshal(a, &objA); err != nil {
+	var objB map[string]any
 	if errUnmarshalA := json.Unmarshal(a, &objA); errUnmarshalA != nil {
 		return false
 	}
-	if err := json.Unmarshal(b, &objB); err != nil {
+	if errUnmarshalB := json.Unmarshal(b, &objB); errUnmarshalB != nil {
 		return false
 	}
-
+	stripVolatileMetadataFields(objA)
-	// Fields to ignore: these change on every refresh but don't affect authentication logic.
+	stripVolatileMetadataFields(objB)
-	// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
+	return reflect.DeepEqual(objA, objB)
 	// - access_token: Google OAuth returns a new access_token on each refresh, this is expected
 	//   and shouldn't trigger file writes (the new token will be fetched again when needed)
 	ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh", "access_token"}
 	for _, field := range ignoredFields {
 		delete(objA, field)
 		delete(objB, field)
 	}
 	return deepEqualJSON(objA, objB)
 }
-func deepEqualJSON(a, b any) bool {
+func stripVolatileMetadataFields(metadata map[string]any) {
-	switch valA := a.(type) {
+	if metadata == nil {
-	case map[string]any:
+		return
-		valB, ok := b.(map[string]any)
+	}
-		if !ok || len(valA) != len(valB) {
+	// These fields change on refresh and would otherwise trigger watcher reload loops.
-			return false
+	for _, field := range []string{"timestamp", "expired", "expires_in", "last_refresh", "access_token"} {
-		}
+		delete(metadata, field)
 		for key, subA := range valA {
 			subB, ok1 := valB[key]
 			if !ok1 || !deepEqualJSON(subA, subB) {
 				return false
 			}
 		}
 		return true
 	case []any:
 		sliceB, ok := b.([]any)
 		if !ok || len(valA) != len(sliceB) {
 			return false
 		}
 		for i := range valA {
 			if !deepEqualJSON(valA[i], sliceB[i]) {
 				return false
 			}
 		}
 		return true
 	case float64:
 		valB, ok := b.(float64)
 		if !ok {
 			return false
 		}
 		return valA == valB
 	case string:
 		valB, ok := b.(string)
 		if !ok {
 			return false
 		}
 		return valA == valB
 	case bool:
 		valB, ok := b.(bool)
 		if !ok {
 			return false
 		}
 		return valA == valB
 	case nil:
 		return b == nil
 	default:
 		return false
 	}
 }
--- a/sdk/auth/gemini.go
+++ b/sdk/auth/gemini.go
@@ -45,8 +45,9 @@ func (a *GeminiAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 	geminiAuth := gemini.NewGeminiAuth()
 	_, err := geminiAuth.GetAuthenticatedClient(ctx, &ts, cfg, &gemini.WebLoginOptions{
-		NoBrowser: opts.NoBrowser,
+		NoBrowser:    opts.NoBrowser,
-		Prompt:    opts.Prompt,
+		CallbackPort: opts.CallbackPort,
 		Prompt:       opts.Prompt,
 	})
 	if err != nil {
 		return nil, fmt.Errorf("gemini authentication failed: %w", err)
--- a/sdk/auth/iflow.go
+++ b/sdk/auth/iflow.go
@@ -42,9 +42,14 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		opts = &LoginOptions{}
 	}
 	callbackPort := iflow.CallbackPort
 	if opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
 	authSvc := iflow.NewIFlowAuth(cfg)
-	oauthServer := iflow.NewOAuthServer(iflow.CallbackPort)
+	oauthServer := iflow.NewOAuthServer(callbackPort)
 	if err := oauthServer.Start(); err != nil {
 		if strings.Contains(err.Error(), "already in use") {
 			return nil, fmt.Errorf("iflow authentication server port in use: %w", err)
@@ -64,21 +69,21 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		return nil, fmt.Errorf("iflow auth: failed to generate state: %w", err)
 	}
-	authURL, redirectURI := authSvc.AuthorizationURL(state, iflow.CallbackPort)
+	authURL, redirectURI := authSvc.AuthorizationURL(state, callbackPort)
 	if !opts.NoBrowser {
 		fmt.Println("Opening browser for iFlow authentication")
 		if !browser.IsAvailable() {
 			log.Warn("No browser available; please open the URL manually")
-			util.PrintSSHTunnelInstructions(iflow.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		} else if err = browser.OpenURL(authURL); err != nil {
 			log.Warnf("Failed to open browser automatically: %v", err)
-			util.PrintSSHTunnelInstructions(iflow.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(iflow.CallbackPort)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 	}
--- a/sdk/auth/interfaces.go
+++ b/sdk/auth/interfaces.go
@@ -14,10 +14,11 @@ var ErrRefreshNotSupported = errors.New("cliproxy auth: refresh not supported")
 // LoginOptions captures generic knobs shared across authenticators.
 // Provider-specific logic can inspect Metadata for extra parameters.
 type LoginOptions struct {
-	NoBrowser bool
+	NoBrowser    bool
-	ProjectID string
+	ProjectID    string
-	Metadata  map[string]string
+	CallbackPort int
-	Prompt    func(prompt string) (string, error)
+	Metadata     map[string]string
 	Prompt       func(prompt string) (string, error)
 }
 // Authenticator manages login and optional refresh flows for a provider.
--- a/Show More
+++ b/Show More