feat(auth): add custom HTTP client with utls for Claude API authentication

Introduce a custom HTTP client utilizing utls with Firefox TLS fingerprinting to bypass Cloudflare fingerprinting on Anthropic domains. Includes support for proxy configuration and enhanced connection management for HTTP/2.
feat(ci): add cleanup step for temporary Docker tags in workflow
2026-02-02 12:30:50 +08:00 · 2026-01-30 21:29:41 +08:00 · 2026-01-30 09:15:00 +08:00 · 2026-01-30 07:26:36 +08:00 · 2026-01-30 04:17:56 +08:00 · 2026-01-30 03:27:17 +08:00
145 changed files with 14218 additions and 7399 deletions
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -10,13 +10,11 @@ env:
  DOCKERHUB_REPO: eceasy/cli-proxy-api

 jobs:
-  docker:
+  docker_amd64:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to DockerHub
@@ -29,18 +27,113 @@ jobs:
          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
-      - name: Build and push
+      - name: Build and push (amd64)
        uses: docker/build-push-action@v6
        with:
          context: .
-          platforms: |
-            linux/amd64
-            linux/arm64
+          platforms: linux/amd64
          push: true
          build-args: |
            VERSION=${{ env.VERSION }}
            COMMIT=${{ env.COMMIT }}
            BUILD_DATE=${{ env.BUILD_DATE }}
          tags: |
-            ${{ env.DOCKERHUB_REPO }}:latest
-            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}
+            ${{ env.DOCKERHUB_REPO }}:latest-amd64
+            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-amd64
+
+  docker_arm64:
+    runs-on: ubuntu-24.04-arm
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Generate Build Metadata
+        run: |
+          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
+          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
+          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
+      - name: Build and push (arm64)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/arm64
+          push: true
+          build-args: |
+            VERSION=${{ env.VERSION }}
+            COMMIT=${{ env.COMMIT }}
+            BUILD_DATE=${{ env.BUILD_DATE }}
+          tags: |
+            ${{ env.DOCKERHUB_REPO }}:latest-arm64
+            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-arm64
+
+  docker_manifest:
+    runs-on: ubuntu-latest
+    needs:
+      - docker_amd64
+      - docker_arm64
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Generate Build Metadata
+        run: |
+          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
+          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
+          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
+      - name: Create and push multi-arch manifests
+        run: |
+          docker buildx imagetools create \
+            --tag "${DOCKERHUB_REPO}:latest" \
+            "${DOCKERHUB_REPO}:latest-amd64" \
+            "${DOCKERHUB_REPO}:latest-arm64"
+          docker buildx imagetools create \
+            --tag "${DOCKERHUB_REPO}:${VERSION}" \
+            "${DOCKERHUB_REPO}:${VERSION}-amd64" \
+            "${DOCKERHUB_REPO}:${VERSION}-arm64"
+      - name: Cleanup temporary tags
+        continue-on-error: true
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+          DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          namespace="${DOCKERHUB_REPO%%/*}"
+          repo_name="${DOCKERHUB_REPO#*/}"
+
+          token="$(
+            curl -fsSL \
+              -H 'Content-Type: application/json' \
+              -d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
+              'https://hub.docker.com/v2/users/login/' \
+              | python3 -c 'import json,sys; print(json.load(sys.stdin)["token"])'
+          )"
+
+          delete_tag() {
+            local tag="$1"
+            local url="https://hub.docker.com/v2/repositories/${namespace}/${repo_name}/tags/${tag}/"
+            local http_code
+            http_code="$(curl -sS -o /dev/null -w "%{http_code}" -X DELETE -H "Authorization: JWT ${token}" "${url}" || true)"
+            if [ "${http_code}" = "204" ] || [ "${http_code}" = "404" ]; then
+              echo "Docker Hub tag removed (or missing): ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
+              return 0
+            fi
+            echo "Docker Hub tag delete failed: ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
+            return 0
+          }
+
+          delete_tag "latest-amd64"
+          delete_tag "latest-arm64"
+          delete_tag "${VERSION}-amd64"
+          delete_tag "${VERSION}-arm64"
--- a/README.md
+++ b/README.md
@@ -130,6 +130,14 @@ Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth

 VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.

+### [ZeroLimit](https://github.com/0xtbug/zero-limit)
+
+Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
+
+### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
+
+A lightweight web admin panel for CLIProxyAPI with health checks, resource monitoring, real-time logs, auto-update, request statistics and pricing display. Supports one-click installation and systemd service.
+
 > [!NOTE]  
 > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.

--- a/README_CN.md
+++ b/README_CN.md
@@ -129,6 +129,14 @@ CLI 封装器，用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户

 一款 VSCode 扩展，提供了在 VSCode 中快速切换 Claude Code 模型的功能，内置 CLIProxyAPI 作为其后端，支持后台自动启动和关闭。

+### [ZeroLimit](https://github.com/0xtbug/zero-limit)
+
+Windows 桌面应用，基于 Tauri + React 构建，用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪，提供实时仪表盘、系统托盘集成和一键代理控制，无需 API 密钥。
+
+### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
+
+面向 CLIProxyAPI 的 Web 管理面板，提供健康检查、资源监控、日志查看、自动更新、请求统计与定价展示，支持一键安装与 systemd 服务。
+
 > [!NOTE]  
 > 如果你开发了基于 CLIProxyAPI 的项目，请提交一个 PR（拉取请求）将其添加到此列表中。

--- a/config.example.yaml
+++ b/config.example.yaml
@@ -85,6 +85,10 @@ nonstream-keepalive-interval: 0
 #   keepalive-seconds: 15   # Default: 0 (disabled). <= 0 disables keep-alives.
 #   bootstrap-retries: 1    # Default: 0 (disabled). Retries before first byte is sent.

+# When true, enable official Codex instructions injection for Codex API requests.
+# When false (default), CodexInstructionsForModel returns immediately without modification.
+codex-instructions-enabled: false
+
 # Gemini API keys
 # gemini-api-key:
 #   - api-key: "AIzaSy...01"
@@ -137,6 +141,15 @@ nonstream-keepalive-interval: 0
 #       - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
 #       - "*-thinking"               # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
 #       - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
+#     cloak:                         # optional: request cloaking for non-Claude-Code clients
+#       mode: "auto"                 # "auto" (default): cloak only when client is not Claude Code
+#                                    # "always": always apply cloaking
+#                                    # "never": never apply cloaking
+#       strict-mode: false           # false (default): prepend Claude Code prompt to user system messages
+#                                    # true: strip all user system messages, keep only Claude Code prompt
+#       sensitive-words:             # optional: words to obfuscate with zero-width characters
+#         - "API"
+#         - "proxy"

 # OpenAI compatibility providers
 # openai-compatibility:
@@ -201,12 +214,27 @@ nonstream-keepalive-interval: 0
 #     - from: "claude-haiku-4-5-20251001"
 #       to: "gemini-2.5-flash"

-# Global OAuth model name mappings (per channel)
-# These mappings rename model IDs for both model listing and request routing.
+# Global OAuth model name aliases (per channel)
+# These aliases rename model IDs for both model listing and request routing.
 # Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
-# NOTE: Mappings do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
+# NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
 # You can repeat the same name with different aliases to expose multiple client model names.
-# oauth-model-mappings:
+oauth-model-alias:
+  antigravity:
+    - name: "rev19-uic3-1p"
+      alias: "gemini-2.5-computer-use-preview-10-2025"
+    - name: "gemini-3-pro-image"
+      alias: "gemini-3-pro-image-preview"
+    - name: "gemini-3-pro-high"
+      alias: "gemini-3-pro-preview"
+    - name: "gemini-3-flash"
+      alias: "gemini-3-flash-preview"
+    - name: "claude-sonnet-4-5"
+      alias: "gemini-claude-sonnet-4-5"
+    - name: "claude-sonnet-4-5-thinking"
+      alias: "gemini-claude-sonnet-4-5-thinking"
+    - name: "claude-opus-4-5-thinking"
+      alias: "gemini-claude-opus-4-5-thinking"
 #   gemini-cli:
 #     - name: "gemini-2.5-pro"          # original model name under this channel
 #       alias: "g2.5p"                  # client-visible alias
@@ -217,9 +245,6 @@ nonstream-keepalive-interval: 0
 #   aistudio:
 #     - name: "gemini-2.5-pro"
 #       alias: "g2.5p"
-#   antigravity:
-#     - name: "gemini-3-pro-preview"
-#       alias: "g3p"
 #   claude:
 #     - name: "claude-sonnet-4-5-20250929"
 #       alias: "cs4.5"
@@ -263,9 +288,21 @@ nonstream-keepalive-interval: 0
 #           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> value
 #         "generationConfig.thinkingConfig.thinkingBudget": 32768
+#   default-raw: # Default raw rules set parameters using raw JSON when missing (must be valid JSON).
+#     - models:
+#         - name: "gemini-2.5-pro" # Supports wildcards (e.g., "gemini-*")
+#           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
+#       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
+#         "generationConfig.responseJsonSchema": "{\"type\":\"object\",\"properties\":{\"answer\":{\"type\":\"string\"}}}"
 #   override: # Override rules always set parameters, overwriting any existing values.
 #     - models:
 #         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
 #           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> value
 #         "reasoning.effort": "high"
+#   override-raw: # Override raw rules always set parameters using raw JSON (must be valid JSON).
+#     - models:
+#         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
+#           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
+#       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
+#         "response_format": "{\"type\":\"json_schema\",\"json_schema\":{\"name\":\"answer\",\"schema\":{\"type\":\"object\"}}}"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -22,7 +22,7 @@ services:
      - "51121:51121"
      - "11451:11451"
    volumes:
-      - ./config.yaml:/CLIProxyAPI/config.yaml
-      - ./auths:/root/.cli-proxy-api
-      - ./logs:/CLIProxyAPI/logs
+      - ${CLI_PROXY_CONFIG_PATH:-./config.yaml}:/CLIProxyAPI/config.yaml
+      - ${CLI_PROXY_AUTH_PATH:-./auths}:/root/.cli-proxy-api
+      - ${CLI_PROXY_LOG_PATH:-./logs}:/CLIProxyAPI/logs
    restart: unless-stopped
--- a/go.mod
+++ b/go.mod
@@ -13,6 +13,7 @@ require (
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/compress v1.17.4
 	github.com/minio/minio-go/v7 v7.0.66
+	github.com/refraction-networking/utls v1.8.2
 	github.com/sirupsen/logrus v1.9.3
 	github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
 	github.com/tidwall/gjson v1.18.0
--- a/go.sum
+++ b/go.sum
@@ -118,6 +118,8 @@ github.com/pjbgf/sha1cd v0.5.0 h1:a+UkboSi1znleCDUNT3M5YxjOnN1fz2FhN48FlwCxs0=
 github.com/pjbgf/sha1cd v0.5.0/go.mod h1:lhpGlyHLpQZoxMv8HcgXvZEhcGs0PG/vsZnEJ7H0iCM=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
+github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc=
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -3,13 +3,14 @@ package management
 import (
 	"bytes"
 	"context"
+	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
-	"net/url"
 	"os"
 	"path/filepath"
 	"sort"
@@ -19,6 +20,7 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/antigravity"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
 	geminiAuth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/gemini"
@@ -230,14 +232,6 @@ func stopForwarderInstance(port int, forwarder *callbackForwarder) {
 	log.Infof("callback forwarder on port %d stopped", port)
 }

-func sanitizeAntigravityFileName(email string) string {
-	if strings.TrimSpace(email) == "" {
-		return "antigravity.json"
-	}
-	replacer := strings.NewReplacer("@", "_", ".", "_")
-	return fmt.Sprintf("antigravity-%s.json", replacer.Replace(email))
-}
-
 func (h *Handler) managementCallbackURL(path string) (string, error) {
 	if h == nil || h.cfg == nil || h.cfg.Port <= 0 {
 		return "", fmt.Errorf("server port is not configured")
@@ -747,6 +741,72 @@ func (h *Handler) registerAuthFromFile(ctx context.Context, path string, data []
 	return err
 }

+// PatchAuthFileStatus toggles the disabled state of an auth file
+func (h *Handler) PatchAuthFileStatus(c *gin.Context) {
+	if h.authManager == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "core auth manager unavailable"})
+		return
+	}
+
+	var req struct {
+		Name     string `json:"name"`
+		Disabled *bool  `json:"disabled"`
+	}
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
+		return
+	}
+
+	name := strings.TrimSpace(req.Name)
+	if name == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "name is required"})
+		return
+	}
+	if req.Disabled == nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "disabled is required"})
+		return
+	}
+
+	ctx := c.Request.Context()
+
+	// Find auth by name or ID
+	var targetAuth *coreauth.Auth
+	if auth, ok := h.authManager.GetByID(name); ok {
+		targetAuth = auth
+	} else {
+		auths := h.authManager.List()
+		for _, auth := range auths {
+			if auth.FileName == name {
+				targetAuth = auth
+				break
+			}
+		}
+	}
+
+	if targetAuth == nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": "auth file not found"})
+		return
+	}
+
+	// Update disabled state
+	targetAuth.Disabled = *req.Disabled
+	if *req.Disabled {
+		targetAuth.Status = coreauth.StatusDisabled
+		targetAuth.StatusMessage = "disabled via management API"
+	} else {
+		targetAuth.Status = coreauth.StatusActive
+		targetAuth.StatusMessage = ""
+	}
+	targetAuth.UpdatedAt = time.Now()
+
+	if _, err := h.authManager.Update(ctx, targetAuth); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to update auth: %v", err)})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{"status": "ok", "disabled": *req.Disabled})
+}
+
 func (h *Handler) disableAuth(ctx context.Context, id string) {
 	if h == nil || h.authManager == nil {
 		return
@@ -913,67 +973,14 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		rawCode := resultMap["code"]
 		code := strings.Split(rawCode, "#")[0]

-		// Exchange code for tokens (replicate logic using updated redirect_uri)
-		// Extract client_id from the modified auth URL
-		clientID := ""
-		if u2, errP := url.Parse(authURL); errP == nil {
-			clientID = u2.Query().Get("client_id")
-		}
-		// Build request
-		bodyMap := map[string]any{
-			"code":          code,
-			"state":         state,
-			"grant_type":    "authorization_code",
-			"client_id":     clientID,
-			"redirect_uri":  "http://localhost:54545/callback",
-			"code_verifier": pkceCodes.CodeVerifier,
-		}
-		bodyJSON, _ := json.Marshal(bodyMap)
-
-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		req, _ := http.NewRequestWithContext(ctx, "POST", "https://console.anthropic.com/v1/oauth/token", strings.NewReader(string(bodyJSON)))
-		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("Accept", "application/json")
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			authErr := claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, errDo)
+		// Exchange code for tokens using internal auth service
+		bundle, errExchange := anthropicAuth.ExchangeCodeForTokens(ctx, code, state, pkceCodes)
+		if errExchange != nil {
+			authErr := claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, errExchange)
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
 			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			return
 		}
-		defer func() {
-			if errClose := resp.Body.Close(); errClose != nil {
-				log.Errorf("failed to close response body: %v", errClose)
-			}
-		}()
-		respBody, _ := io.ReadAll(resp.Body)
-		if resp.StatusCode != http.StatusOK {
-			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
-			SetOAuthSessionError(state, fmt.Sprintf("token exchange failed with status %d", resp.StatusCode))
-			return
-		}
-		var tResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			ExpiresIn    int    `json:"expires_in"`
-			Account      struct {
-				EmailAddress string `json:"email_address"`
-			} `json:"account"`
-		}
-		if errU := json.Unmarshal(respBody, &tResp); errU != nil {
-			log.Errorf("failed to parse token response: %v", errU)
-			SetOAuthSessionError(state, "Failed to parse token response")
-			return
-		}
-		bundle := &claude.ClaudeAuthBundle{
-			TokenData: claude.ClaudeTokenData{
-				AccessToken:  tResp.AccessToken,
-				RefreshToken: tResp.RefreshToken,
-				Email:        tResp.Account.EmailAddress,
-				Expire:       time.Now().Add(time.Duration(tResp.ExpiresIn) * time.Second).Format(time.RFC3339),
-			},
-			LastRefresh: time.Now().Format(time.RFC3339),
-		}

 		// Create token storage
 		tokenStorage := anthropicAuth.CreateTokenStorage(bundle)
@@ -1013,17 +1020,13 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {

 	fmt.Println("Initializing Google authentication...")

-	// OAuth2 configuration (mirrors internal/auth/gemini)
+	// OAuth2 configuration using exported constants from internal/auth/gemini
 	conf := &oauth2.Config{
-		ClientID:     "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com",
-		ClientSecret: "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",
-		RedirectURL:  "http://localhost:8085/oauth2callback",
-		Scopes: []string{
-			"https://www.googleapis.com/auth/cloud-platform",
-			"https://www.googleapis.com/auth/userinfo.email",
-			"https://www.googleapis.com/auth/userinfo.profile",
-		},
-		Endpoint: google.Endpoint,
+		ClientID:     geminiAuth.ClientID,
+		ClientSecret: geminiAuth.ClientSecret,
+		RedirectURL:  fmt.Sprintf("http://localhost:%d/oauth2callback", geminiAuth.DefaultCallbackPort),
+		Scopes:       geminiAuth.Scopes,
+		Endpoint:     google.Endpoint,
 	}

 	// Build authorization URL and return it immediately
@@ -1145,13 +1148,9 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		}

 		ifToken["token_uri"] = "https://oauth2.googleapis.com/token"
-		ifToken["client_id"] = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-		ifToken["client_secret"] = "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"
-		ifToken["scopes"] = []string{
-			"https://www.googleapis.com/auth/cloud-platform",
-			"https://www.googleapis.com/auth/userinfo.email",
-			"https://www.googleapis.com/auth/userinfo.profile",
-		}
+		ifToken["client_id"] = geminiAuth.ClientID
+		ifToken["client_secret"] = geminiAuth.ClientSecret
+		ifToken["scopes"] = geminiAuth.Scopes
 		ifToken["universe_domain"] = "googleapis.com"

 		ts := geminiAuth.GeminiTokenStorage{
@@ -1338,74 +1337,34 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		}

 		log.Debug("Authorization code received, exchanging for tokens...")
-		// Extract client_id from authURL
-		clientID := ""
-		if u2, errP := url.Parse(authURL); errP == nil {
-			clientID = u2.Query().Get("client_id")
-		}
-		// Exchange code for tokens with redirect equal to mgmtRedirect
-		form := url.Values{
-			"grant_type":    {"authorization_code"},
-			"client_id":     {clientID},
-			"code":          {code},
-			"redirect_uri":  {"http://localhost:1455/auth/callback"},
-			"code_verifier": {pkceCodes.CodeVerifier},
-		}
-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		req, _ := http.NewRequestWithContext(ctx, "POST", "https://auth.openai.com/oauth/token", strings.NewReader(form.Encode()))
-		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-		req.Header.Set("Accept", "application/json")
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			authErr := codex.NewAuthenticationError(codex.ErrCodeExchangeFailed, errDo)
+		// Exchange code for tokens using internal auth service
+		bundle, errExchange := openaiAuth.ExchangeCodeForTokens(ctx, code, pkceCodes)
+		if errExchange != nil {
+			authErr := codex.NewAuthenticationError(codex.ErrCodeExchangeFailed, errExchange)
 			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
 			return
 		}
-		defer func() { _ = resp.Body.Close() }()
-		respBody, _ := io.ReadAll(resp.Body)
-		if resp.StatusCode != http.StatusOK {
-			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed with status %d", resp.StatusCode))
-			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
-			return
-		}
-		var tokenResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			IDToken      string `json:"id_token"`
-			ExpiresIn    int    `json:"expires_in"`
-		}
-		if errU := json.Unmarshal(respBody, &tokenResp); errU != nil {
-			SetOAuthSessionError(state, "Failed to parse token response")
-			log.Errorf("failed to parse token response: %v", errU)
-			return
-		}
-		claims, _ := codex.ParseJWTToken(tokenResp.IDToken)
-		email := ""
-		accountID := ""
+
+		// Extract additional info for filename generation
+		claims, _ := codex.ParseJWTToken(bundle.TokenData.IDToken)
+		planType := ""
+		hashAccountID := ""
 		if claims != nil {
-			email = claims.GetUserEmail()
-			accountID = claims.GetAccountID()
-		}
-		// Build bundle compatible with existing storage
-		bundle := &codex.CodexAuthBundle{
-			TokenData: codex.CodexTokenData{
-				IDToken:      tokenResp.IDToken,
-				AccessToken:  tokenResp.AccessToken,
-				RefreshToken: tokenResp.RefreshToken,
-				AccountID:    accountID,
-				Email:        email,
-				Expire:       time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339),
-			},
-			LastRefresh: time.Now().Format(time.RFC3339),
+			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
+			if accountID := claims.GetAccountID(); accountID != "" {
+				digest := sha256.Sum256([]byte(accountID))
+				hashAccountID = hex.EncodeToString(digest[:])[:8]
+			}
 		}

 		// Create token storage and persist
 		tokenStorage := openaiAuth.CreateTokenStorage(bundle)
+		fileName := codex.CredentialFileName(tokenStorage.Email, planType, hashAccountID, true)
 		record := &coreauth.Auth{
-			ID:       fmt.Sprintf("codex-%s.json", tokenStorage.Email),
+			ID:       fileName,
 			Provider: "codex",
-			FileName: fmt.Sprintf("codex-%s.json", tokenStorage.Email),
+			FileName: fileName,
 			Storage:  tokenStorage,
 			Metadata: map[string]any{
 				"email":      tokenStorage.Email,
@@ -1431,23 +1390,12 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 }

 func (h *Handler) RequestAntigravityToken(c *gin.Context) {
-	const (
-		antigravityCallbackPort = 51121
-		antigravityClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-		antigravityClientSecret = "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"
-	)
-	var antigravityScopes = []string{
-		"https://www.googleapis.com/auth/cloud-platform",
-		"https://www.googleapis.com/auth/userinfo.email",
-		"https://www.googleapis.com/auth/userinfo.profile",
-		"https://www.googleapis.com/auth/cclog",
-		"https://www.googleapis.com/auth/experimentsandconfigs",
-	}
-
 	ctx := context.Background()

 	fmt.Println("Initializing Antigravity authentication...")

+	authSvc := antigravity.NewAntigravityAuth(h.cfg, nil)
+
 	state, errState := misc.GenerateRandomState()
 	if errState != nil {
 		log.Errorf("Failed to generate state parameter: %v", errState)
@@ -1455,17 +1403,8 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		return
 	}

-	redirectURI := fmt.Sprintf("http://localhost:%d/oauth-callback", antigravityCallbackPort)
-
-	params := url.Values{}
-	params.Set("access_type", "offline")
-	params.Set("client_id", antigravityClientID)
-	params.Set("prompt", "consent")
-	params.Set("redirect_uri", redirectURI)
-	params.Set("response_type", "code")
-	params.Set("scope", strings.Join(antigravityScopes, " "))
-	params.Set("state", state)
-	authURL := "https://accounts.google.com/o/oauth2/v2/auth?" + params.Encode()
+	redirectURI := fmt.Sprintf("http://localhost:%d/oauth-callback", antigravity.CallbackPort)
+	authURL := authSvc.BuildAuthURL(state, redirectURI)

 	RegisterOAuthSession(state, "antigravity")

@@ -1479,7 +1418,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			return
 		}
 		var errStart error
-		if forwarder, errStart = startCallbackForwarder(antigravityCallbackPort, "antigravity", targetURL); errStart != nil {
+		if forwarder, errStart = startCallbackForwarder(antigravity.CallbackPort, "antigravity", targetURL); errStart != nil {
 			log.WithError(errStart).Error("failed to start antigravity callback forwarder")
 			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start callback server"})
 			return
@@ -1488,7 +1427,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {

 	go func() {
 		if isWebUI {
-			defer stopCallbackForwarderInstance(antigravityCallbackPort, forwarder)
+			defer stopCallbackForwarderInstance(antigravity.CallbackPort, forwarder)
 		}

 		waitFile := filepath.Join(h.cfg.AuthDir, fmt.Sprintf(".oauth-antigravity-%s.oauth", state))
@@ -1528,93 +1467,36 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			time.Sleep(500 * time.Millisecond)
 		}

-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		form := url.Values{}
-		form.Set("code", authCode)
-		form.Set("client_id", antigravityClientID)
-		form.Set("client_secret", antigravityClientSecret)
-		form.Set("redirect_uri", redirectURI)
-		form.Set("grant_type", "authorization_code")
-
-		req, errNewRequest := http.NewRequestWithContext(ctx, http.MethodPost, "https://oauth2.googleapis.com/token", strings.NewReader(form.Encode()))
-		if errNewRequest != nil {
-			log.Errorf("Failed to build token request: %v", errNewRequest)
-			SetOAuthSessionError(state, "Failed to build token request")
-			return
-		}
-		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			log.Errorf("Failed to execute token request: %v", errDo)
+		tokenResp, errToken := authSvc.ExchangeCodeForTokens(ctx, authCode, redirectURI)
+		if errToken != nil {
+			log.Errorf("Failed to exchange token: %v", errToken)
 			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}
-		defer func() {
-			if errClose := resp.Body.Close(); errClose != nil {
-				log.Errorf("antigravity token exchange close error: %v", errClose)
-			}
-		}()

-		if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
-			bodyBytes, _ := io.ReadAll(resp.Body)
-			log.Errorf("Antigravity token exchange failed with status %d: %s", resp.StatusCode, string(bodyBytes))
-			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed: %d", resp.StatusCode))
+		accessToken := strings.TrimSpace(tokenResp.AccessToken)
+		if accessToken == "" {
+			log.Error("antigravity: token exchange returned empty access token")
+			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}

-		var tokenResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			ExpiresIn    int64  `json:"expires_in"`
-			TokenType    string `json:"token_type"`
-		}
-		if errDecode := json.NewDecoder(resp.Body).Decode(&tokenResp); errDecode != nil {
-			log.Errorf("Failed to parse token response: %v", errDecode)
-			SetOAuthSessionError(state, "Failed to parse token response")
+		email, errInfo := authSvc.FetchUserInfo(ctx, accessToken)
+		if errInfo != nil {
+			log.Errorf("Failed to fetch user info: %v", errInfo)
+			SetOAuthSessionError(state, "Failed to fetch user info")
 			return
 		}
-
-		email := ""
-		if strings.TrimSpace(tokenResp.AccessToken) != "" {
-			infoReq, errInfoReq := http.NewRequestWithContext(ctx, http.MethodGet, "https://www.googleapis.com/oauth2/v1/userinfo?alt=json", nil)
-			if errInfoReq != nil {
-				log.Errorf("Failed to build user info request: %v", errInfoReq)
-				SetOAuthSessionError(state, "Failed to build user info request")
-				return
-			}
-			infoReq.Header.Set("Authorization", "Bearer "+tokenResp.AccessToken)
-
-			infoResp, errInfo := httpClient.Do(infoReq)
-			if errInfo != nil {
-				log.Errorf("Failed to execute user info request: %v", errInfo)
-				SetOAuthSessionError(state, "Failed to execute user info request")
-				return
-			}
-			defer func() {
-				if errClose := infoResp.Body.Close(); errClose != nil {
-					log.Errorf("antigravity user info close error: %v", errClose)
-				}
-			}()
-
-			if infoResp.StatusCode >= http.StatusOK && infoResp.StatusCode < http.StatusMultipleChoices {
-				var infoPayload struct {
-					Email string `json:"email"`
-				}
-				if errDecodeInfo := json.NewDecoder(infoResp.Body).Decode(&infoPayload); errDecodeInfo == nil {
-					email = strings.TrimSpace(infoPayload.Email)
-				}
-			} else {
-				bodyBytes, _ := io.ReadAll(infoResp.Body)
-				log.Errorf("User info request failed with status %d: %s", infoResp.StatusCode, string(bodyBytes))
-				SetOAuthSessionError(state, fmt.Sprintf("User info request failed: %d", infoResp.StatusCode))
-				return
-			}
+		email = strings.TrimSpace(email)
+		if email == "" {
+			log.Error("antigravity: user info returned empty email")
+			SetOAuthSessionError(state, "Failed to fetch user info")
+			return
 		}

 		projectID := ""
-		if strings.TrimSpace(tokenResp.AccessToken) != "" {
-			fetchedProjectID, errProject := sdkAuth.FetchAntigravityProjectID(ctx, tokenResp.AccessToken, httpClient)
+		if accessToken != "" {
+			fetchedProjectID, errProject := authSvc.FetchProjectID(ctx, accessToken)
 			if errProject != nil {
 				log.Warnf("antigravity: failed to fetch project ID: %v", errProject)
 			} else {
@@ -1639,7 +1521,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			metadata["project_id"] = projectID
 		}

-		fileName := sanitizeAntigravityFileName(email)
+		fileName := antigravity.CredentialFileName(email)
 		label := strings.TrimSpace(email)
 		if label == "" {
 			label = "antigravity"
@@ -1703,7 +1585,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 		// Create token storage
 		tokenStorage := qwenAuth.CreateTokenStorage(tokenData)

-		tokenStorage.Email = fmt.Sprintf("qwen-%d", time.Now().UnixMilli())
+		tokenStorage.Email = fmt.Sprintf("%d", time.Now().UnixMilli())
 		record := &coreauth.Auth{
 			ID:       fmt.Sprintf("qwen-%s.json", tokenStorage.Email),
 			Provider: "qwen",
@@ -1808,7 +1690,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 		tokenStorage := authSvc.CreateTokenStorage(tokenData)
 		identifier := strings.TrimSpace(tokenStorage.Email)
 		if identifier == "" {
-			identifier = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+			identifier = fmt.Sprintf("%d", time.Now().UnixMilli())
 			tokenStorage.Email = identifier
 		}
 		record := &coreauth.Auth{
@@ -1893,15 +1775,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	fileName := iflowauth.SanitizeIFlowFileName(email)
 	if fileName == "" {
 		fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+	} else {
+		fileName = fmt.Sprintf("iflow-%s", fileName)
 	}

 	tokenStorage.Email = email
 	timestamp := time.Now().Unix()

 	record := &coreauth.Auth{
-		ID:       fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		ID:       fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Provider: "iflow",
-		FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		FileName: fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Storage:  tokenStorage,
 		Metadata: map[string]any{
 			"email":        email,
@@ -2108,7 +1992,20 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 			finalProjectID := projectID
 			if responseProjectID != "" {
 				if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
-					log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					// Check if this is a free user (gen-lang-client projects or free/legacy tier)
+					isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
+						strings.EqualFold(tierID, "FREE") ||
+						strings.EqualFold(tierID, "LEGACY")
+
+					if isFreeUser {
+						// For free users, use backend project ID for preview model access
+						log.Infof("Gemini onboarding: frontend project %s maps to backend project %s", projectID, responseProjectID)
+						log.Infof("Using backend project ID: %s (recommended for preview model access)", responseProjectID)
+						finalProjectID = responseProjectID
+					} else {
+						// Pro users: keep requested project ID (original behavior)
+						log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					}
 				} else {
 					finalProjectID = responseProjectID
 				}
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -703,21 +703,21 @@ func (h *Handler) DeleteOAuthExcludedModels(c *gin.Context) {
 	h.persist(c)
 }

-// oauth-model-mappings: map[string][]ModelNameMapping
-func (h *Handler) GetOAuthModelMappings(c *gin.Context) {
-	c.JSON(200, gin.H{"oauth-model-mappings": sanitizedOAuthModelMappings(h.cfg.OAuthModelMappings)})
+// oauth-model-alias: map[string][]OAuthModelAlias
+func (h *Handler) GetOAuthModelAlias(c *gin.Context) {
+	c.JSON(200, gin.H{"oauth-model-alias": sanitizedOAuthModelAlias(h.cfg.OAuthModelAlias)})
 }

-func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
+func (h *Handler) PutOAuthModelAlias(c *gin.Context) {
 	data, err := c.GetRawData()
 	if err != nil {
 		c.JSON(400, gin.H{"error": "failed to read body"})
 		return
 	}
-	var entries map[string][]config.ModelNameMapping
+	var entries map[string][]config.OAuthModelAlias
 	if err = json.Unmarshal(data, &entries); err != nil {
 		var wrapper struct {
-			Items map[string][]config.ModelNameMapping `json:"items"`
+			Items map[string][]config.OAuthModelAlias `json:"items"`
 		}
 		if err2 := json.Unmarshal(data, &wrapper); err2 != nil {
 			c.JSON(400, gin.H{"error": "invalid body"})
@@ -725,15 +725,15 @@ func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
 		}
 		entries = wrapper.Items
 	}
-	h.cfg.OAuthModelMappings = sanitizedOAuthModelMappings(entries)
+	h.cfg.OAuthModelAlias = sanitizedOAuthModelAlias(entries)
 	h.persist(c)
 }

-func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
+func (h *Handler) PatchOAuthModelAlias(c *gin.Context) {
 	var body struct {
-		Provider *string                   `json:"provider"`
-		Channel  *string                   `json:"channel"`
-		Mappings []config.ModelNameMapping `json:"mappings"`
+		Provider *string                  `json:"provider"`
+		Channel  *string                  `json:"channel"`
+		Aliases  []config.OAuthModelAlias `json:"aliases"`
 	}
 	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
 		c.JSON(400, gin.H{"error": "invalid body"})
@@ -751,32 +751,32 @@ func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
 		return
 	}

-	normalizedMap := sanitizedOAuthModelMappings(map[string][]config.ModelNameMapping{channel: body.Mappings})
+	normalizedMap := sanitizedOAuthModelAlias(map[string][]config.OAuthModelAlias{channel: body.Aliases})
 	normalized := normalizedMap[channel]
 	if len(normalized) == 0 {
-		if h.cfg.OAuthModelMappings == nil {
+		if h.cfg.OAuthModelAlias == nil {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
-		if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
+		if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
-		delete(h.cfg.OAuthModelMappings, channel)
-		if len(h.cfg.OAuthModelMappings) == 0 {
-			h.cfg.OAuthModelMappings = nil
+		delete(h.cfg.OAuthModelAlias, channel)
+		if len(h.cfg.OAuthModelAlias) == 0 {
+			h.cfg.OAuthModelAlias = nil
 		}
 		h.persist(c)
 		return
 	}
-	if h.cfg.OAuthModelMappings == nil {
-		h.cfg.OAuthModelMappings = make(map[string][]config.ModelNameMapping)
+	if h.cfg.OAuthModelAlias == nil {
+		h.cfg.OAuthModelAlias = make(map[string][]config.OAuthModelAlias)
 	}
-	h.cfg.OAuthModelMappings[channel] = normalized
+	h.cfg.OAuthModelAlias[channel] = normalized
 	h.persist(c)
 }

-func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
+func (h *Handler) DeleteOAuthModelAlias(c *gin.Context) {
 	channel := strings.ToLower(strings.TrimSpace(c.Query("channel")))
 	if channel == "" {
 		channel = strings.ToLower(strings.TrimSpace(c.Query("provider")))
@@ -785,17 +785,17 @@ func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
 		c.JSON(400, gin.H{"error": "missing channel"})
 		return
 	}
-	if h.cfg.OAuthModelMappings == nil {
+	if h.cfg.OAuthModelAlias == nil {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
-	if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
+	if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
-	delete(h.cfg.OAuthModelMappings, channel)
-	if len(h.cfg.OAuthModelMappings) == 0 {
-		h.cfg.OAuthModelMappings = nil
+	delete(h.cfg.OAuthModelAlias, channel)
+	if len(h.cfg.OAuthModelAlias) == 0 {
+		h.cfg.OAuthModelAlias = nil
 	}
 	h.persist(c)
 }
@@ -1042,26 +1042,26 @@ func normalizeVertexCompatKey(entry *config.VertexCompatKey) {
 	entry.Models = normalized
 }

-func sanitizedOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string][]config.ModelNameMapping {
+func sanitizedOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string][]config.OAuthModelAlias {
 	if len(entries) == 0 {
 		return nil
 	}
-	copied := make(map[string][]config.ModelNameMapping, len(entries))
-	for channel, mappings := range entries {
-		if len(mappings) == 0 {
+	copied := make(map[string][]config.OAuthModelAlias, len(entries))
+	for channel, aliases := range entries {
+		if len(aliases) == 0 {
 			continue
 		}
-		copied[channel] = append([]config.ModelNameMapping(nil), mappings...)
+		copied[channel] = append([]config.OAuthModelAlias(nil), aliases...)
 	}
 	if len(copied) == 0 {
 		return nil
 	}
-	cfg := config.Config{OAuthModelMappings: copied}
-	cfg.SanitizeOAuthModelMappings()
-	if len(cfg.OAuthModelMappings) == 0 {
+	cfg := config.Config{OAuthModelAlias: copied}
+	cfg.SanitizeOAuthModelAlias()
+	if len(cfg.OAuthModelAlias) == 0 {
 		return nil
 	}
-	return cfg.OAuthModelMappings
+	return cfg.OAuthModelAlias
 }

 // GetAmpCode returns the complete ampcode configuration.
--- a/internal/api/handlers/management/logs.go
+++ b/internal/api/handlers/management/logs.go
@@ -13,7 +13,7 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )

 const (
@@ -360,16 +360,7 @@ func (h *Handler) logDirectory() string {
 	if h.logDir != "" {
 		return h.logDir
 	}
-	if base := util.WritablePath(); base != "" {
-		return filepath.Join(base, "logs")
-	}
-	if h.configFilePath != "" {
-		dir := filepath.Dir(h.configFilePath)
-		if dir != "" && dir != "." {
-			return filepath.Join(dir, "logs")
-		}
-	}
-	return "logs"
+	return logging.ResolveLogDirectory(h.cfg)
 }

 func (h *Handler) collectLogFiles(dir string) ([]string, error) {
--- a/internal/api/handlers/management/model_definitions.go
+++ b/internal/api/handlers/management/model_definitions.go
@@ -0,0 +1,33 @@
+package management
+
+import (
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+)
+
+// GetStaticModelDefinitions returns static model metadata for a given channel.
+// Channel is provided via path param (:channel) or query param (?channel=...).
+func (h *Handler) GetStaticModelDefinitions(c *gin.Context) {
+	channel := strings.TrimSpace(c.Param("channel"))
+	if channel == "" {
+		channel = strings.TrimSpace(c.Query("channel"))
+	}
+	if channel == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "channel is required"})
+		return
+	}
+
+	models := registry.GetStaticModelDefinitionsByChannel(channel)
+	if models == nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "unknown channel", "channel": channel})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"channel": strings.ToLower(strings.TrimSpace(channel)),
+		"models":  models,
+	})
+}
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"

 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
@@ -103,6 +104,7 @@ func captureRequestInfo(c *gin.Context) (*RequestInfo, error) {
 		Headers:   headers,
 		Body:      body,
 		RequestID: logging.GetGinRequestID(c),
+		Timestamp: time.Now(),
 	}, nil
 }

--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -7,6 +7,7 @@ import (
 	"bytes"
 	"net/http"
 	"strings"
+	"time"

 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
@@ -20,22 +21,24 @@ type RequestInfo struct {
 	Headers   map[string][]string // Headers contains the request headers.
 	Body      []byte              // Body is the raw request body.
 	RequestID string              // RequestID is the unique identifier for the request.
+	Timestamp time.Time           // Timestamp is when the request was received.
 }

 // ResponseWriterWrapper wraps the standard gin.ResponseWriter to intercept and log response data.
 // It is designed to handle both standard and streaming responses, ensuring that logging operations do not block the client response.
 type ResponseWriterWrapper struct {
 	gin.ResponseWriter
-	body           *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
-	isStreaming    bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
-	streamWriter   logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
-	chunkChannel   chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
-	streamDone     chan struct{}              // streamDone signals when the streaming goroutine completes.
-	logger         logging.RequestLogger      // logger is the instance of the request logger service.
-	requestInfo    *RequestInfo               // requestInfo holds the details of the original request.
-	statusCode     int                        // statusCode stores the HTTP status code of the response.
-	headers        map[string][]string        // headers stores the response headers.
-	logOnErrorOnly bool                       // logOnErrorOnly enables logging only when an error response is detected.
+	body                *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
+	isStreaming         bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
+	streamWriter        logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
+	chunkChannel        chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
+	streamDone          chan struct{}              // streamDone signals when the streaming goroutine completes.
+	logger              logging.RequestLogger      // logger is the instance of the request logger service.
+	requestInfo         *RequestInfo               // requestInfo holds the details of the original request.
+	statusCode          int                        // statusCode stores the HTTP status code of the response.
+	headers             map[string][]string        // headers stores the response headers.
+	logOnErrorOnly      bool                       // logOnErrorOnly enables logging only when an error response is detected.
+	firstChunkTimestamp time.Time                  // firstChunkTimestamp captures TTFB for streaming responses.
 }

 // NewResponseWriterWrapper creates and initializes a new ResponseWriterWrapper.
@@ -73,6 +76,10 @@ func (w *ResponseWriterWrapper) Write(data []byte) (int, error) {

 	// THEN: Handle logging based on response type
 	if w.isStreaming && w.chunkChannel != nil {
+		// Capture TTFB on first chunk (synchronous, before async channel send)
+		if w.firstChunkTimestamp.IsZero() {
+			w.firstChunkTimestamp = time.Now()
+		}
 		// For streaming responses: Send to async logging channel (non-blocking)
 		select {
 		case w.chunkChannel <- append([]byte(nil), data...): // Non-blocking send with copy
@@ -117,6 +124,10 @@ func (w *ResponseWriterWrapper) WriteString(data string) (int, error) {

 	// THEN: Capture for logging
 	if w.isStreaming && w.chunkChannel != nil {
+		// Capture TTFB on first chunk (synchronous, before async channel send)
+		if w.firstChunkTimestamp.IsZero() {
+			w.firstChunkTimestamp = time.Now()
+		}
 		select {
 		case w.chunkChannel <- []byte(data):
 		default:
@@ -280,6 +291,8 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 			w.streamDone = nil
 		}

+		w.streamWriter.SetFirstChunkTimestamp(w.firstChunkTimestamp)
+
 		// Write API Request and Response to the streaming log before closing
 		apiRequest := w.extractAPIRequest(c)
 		if len(apiRequest) > 0 {
@@ -297,7 +310,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		return nil
 	}

-	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
+	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
 }

 func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
@@ -337,7 +350,18 @@ func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
 	return data
 }

-func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
+func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time.Time {
+	ts, isExist := c.Get("API_RESPONSE_TIMESTAMP")
+	if !isExist {
+		return time.Time{}
+	}
+	if t, ok := ts.(time.Time); ok {
+		return t
+	}
+	return time.Time{}
+}
+
+func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
 	if w.requestInfo == nil {
 		return nil
 	}
@@ -348,7 +372,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 	}

 	if loggerWithOptions, ok := w.logger.(interface {
-		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string) error
+		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
 	}); ok {
 		return loggerWithOptions.LogRequestWithOptions(
 			w.requestInfo.URL,
@@ -363,6 +387,8 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 			apiResponseErrors,
 			forceLog,
 			w.requestInfo.RequestID,
+			w.requestInfo.Timestamp,
+			apiResponseTimestamp,
 		)
 	}

@@ -378,5 +404,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 		apiResponseBody,
 		apiResponseErrors,
 		w.requestInfo.RequestID,
+		w.requestInfo.Timestamp,
+		apiResponseTimestamp,
 	)
 }
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -134,10 +135,11 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 		}

 		// Normalize model (handles dynamic thinking suffixes)
-		normalizedModel, thinkingMetadata := util.NormalizeThinkingModel(modelName)
+		suffixResult := thinking.ParseSuffix(modelName)
+		normalizedModel := suffixResult.ModelName
 		thinkingSuffix := ""
-		if thinkingMetadata != nil && strings.HasPrefix(modelName, normalizedModel) {
-			thinkingSuffix = modelName[len(normalizedModel):]
+		if suffixResult.HasSuffix {
+			thinkingSuffix = "(" + suffixResult.RawSuffix + ")"
 		}

 		resolveMappedModel := func() (string, []string) {
@@ -157,13 +159,13 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			// Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target
 			// already specifies its own thinking suffix.
 			if thinkingSuffix != "" {
-				_, mappedThinkingMetadata := util.NormalizeThinkingModel(mappedModel)
-				if mappedThinkingMetadata == nil {
+				mappedSuffixResult := thinking.ParseSuffix(mappedModel)
+				if !mappedSuffixResult.HasSuffix {
 					mappedModel += thinkingSuffix
 				}
 			}

-			mappedBaseModel, _ := util.NormalizeThinkingModel(mappedModel)
+			mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName
 			mappedProviders := util.GetProviderName(mappedBaseModel)
 			if len(mappedProviders) == 0 {
 				return "", nil
--- a/internal/api/modules/amp/model_mapping.go
+++ b/internal/api/modules/amp/model_mapping.go
@@ -8,6 +8,7 @@ import (
 	"sync"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )
@@ -44,6 +45,11 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
 // MapModel checks if a mapping exists for the requested model and if the
 // target model has available local providers. Returns the mapped model name
 // or empty string if no valid mapping exists.
+//
+// If the requested model contains a thinking suffix (e.g., "g25p(8192)"),
+// the suffix is preserved in the returned model name (e.g., "gemini-2.5-pro(8192)").
+// However, if the mapping target already contains a suffix, the config suffix
+// takes priority over the user's suffix.
 func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 	if requestedModel == "" {
 		return ""
@@ -52,16 +58,20 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 	m.mu.RLock()
 	defer m.mu.RUnlock()

-	// Normalize the requested model for lookup
-	normalizedRequest := strings.ToLower(strings.TrimSpace(requestedModel))
+	// Extract thinking suffix from requested model using ParseSuffix
+	requestResult := thinking.ParseSuffix(requestedModel)
+	baseModel := requestResult.ModelName

-	// Check for direct mapping
-	targetModel, exists := m.mappings[normalizedRequest]
+	// Normalize the base model for lookup (case-insensitive)
+	normalizedBase := strings.ToLower(strings.TrimSpace(baseModel))
+
+	// Check for direct mapping using base model name
+	targetModel, exists := m.mappings[normalizedBase]
 	if !exists {
-		// Try regex mappings in order
-		base, _ := util.NormalizeThinkingModel(requestedModel)
+		// Try regex mappings in order using base model only
+		// (suffix is handled separately via ParseSuffix)
 		for _, rm := range m.regexps {
-			if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) {
+			if rm.re.MatchString(baseModel) {
 				targetModel = rm.to
 				exists = true
 				break
@@ -72,14 +82,28 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 		}
 	}

-	// Verify target model has available providers
-	normalizedTarget, _ := util.NormalizeThinkingModel(targetModel)
-	providers := util.GetProviderName(normalizedTarget)
+	// Check if target model already has a thinking suffix (config priority)
+	targetResult := thinking.ParseSuffix(targetModel)
+
+	// Verify target model has available providers (use base model for lookup)
+	providers := util.GetProviderName(targetResult.ModelName)
 	if len(providers) == 0 {
 		log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel)
 		return ""
 	}

+	// Suffix handling: config suffix takes priority, otherwise preserve user suffix
+	if targetResult.HasSuffix {
+		// Config's "to" already contains a suffix - use it as-is (config priority)
+		return targetModel
+	}
+
+	// Preserve user's thinking suffix on the mapped model
+	// (skip empty suffixes to avoid returning "model()")
+	if requestResult.HasSuffix && requestResult.RawSuffix != "" {
+		return targetModel + "(" + requestResult.RawSuffix + ")"
+	}
+
 	// Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go
 	return targetModel
 }
--- a/internal/api/modules/amp/model_mapping_test.go
+++ b/internal/api/modules/amp/model_mapping_test.go
@@ -217,10 +217,10 @@ func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) {

 	mapper := NewModelMapper(mappings)

-	// Incoming model has reasoning suffix but should match base via regex
+	// Incoming model has reasoning suffix, regex matches base, suffix is preserved
 	result := mapper.MapModel("gpt-5(high)")
-	if result != "gemini-2.5-pro" {
-		t.Errorf("Expected gemini-2.5-pro, got %s", result)
+	if result != "gemini-2.5-pro(high)" {
+		t.Errorf("Expected gemini-2.5-pro(high), got %s", result)
 	}
 }

@@ -281,3 +281,95 @@ func TestModelMapper_Regex_CaseInsensitive(t *testing.T) {
 		t.Errorf("Expected claude-sonnet-4, got %s", result)
 	}
 }
+
+func TestModelMapper_SuffixPreservation(t *testing.T) {
+	reg := registry.GetGlobalRegistry()
+
+	// Register test models
+	reg.RegisterClient("test-client-suffix", "gemini", []*registry.ModelInfo{
+		{ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"},
+	})
+	reg.RegisterClient("test-client-suffix-2", "claude", []*registry.ModelInfo{
+		{ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"},
+	})
+	defer reg.UnregisterClient("test-client-suffix")
+	defer reg.UnregisterClient("test-client-suffix-2")
+
+	tests := []struct {
+		name     string
+		mappings []config.AmpModelMapping
+		input    string
+		want     string
+	}{
+		{
+			name:     "numeric suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(8192)",
+			want:     "gemini-2.5-pro(8192)",
+		},
+		{
+			name:     "level suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(high)",
+			want:     "gemini-2.5-pro(high)",
+		},
+		{
+			name:     "no suffix unchanged",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p",
+			want:     "gemini-2.5-pro",
+		},
+		{
+			name:     "config suffix takes priority",
+			mappings: []config.AmpModelMapping{{From: "alias", To: "gemini-2.5-pro(medium)"}},
+			input:    "alias(high)",
+			want:     "gemini-2.5-pro(medium)",
+		},
+		{
+			name:     "regex with suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "^g25.*", To: "gemini-2.5-pro", Regex: true}},
+			input:    "g25p(8192)",
+			want:     "gemini-2.5-pro(8192)",
+		},
+		{
+			name:     "auto suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(auto)",
+			want:     "gemini-2.5-pro(auto)",
+		},
+		{
+			name:     "none suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(none)",
+			want:     "gemini-2.5-pro(none)",
+		},
+		{
+			name:     "case insensitive base lookup with suffix",
+			mappings: []config.AmpModelMapping{{From: "G25P", To: "gemini-2.5-pro"}},
+			input:    "g25p(high)",
+			want:     "gemini-2.5-pro(high)",
+		},
+		{
+			name:     "empty suffix filtered out",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p()",
+			want:     "gemini-2.5-pro",
+		},
+		{
+			name:     "incomplete suffix treated as no suffix",
+			mappings: []config.AmpModelMapping{{From: "g25p(high", To: "gemini-2.5-pro"}},
+			input:    "g25p(high",
+			want:     "gemini-2.5-pro",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mapper := NewModelMapper(tt.mappings)
+			got := mapper.MapModel(tt.input)
+			if got != tt.want {
+				t.Errorf("MapModel(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -12,6 +12,7 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
+	"reflect"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -26,6 +27,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
@@ -254,15 +256,13 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	}
 	managementasset.SetCurrentConfig(cfg)
 	auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
+	misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
 	// Initialize management handler
 	s.mgmt = managementHandlers.NewHandler(cfg, configFilePath, authManager)
 	if optionState.localPassword != "" {
 		s.mgmt.SetLocalPassword(optionState.localPassword)
 	}
-	logDir := filepath.Join(s.currentPath, "logs")
-	if base := util.WritablePath(); base != "" {
-		logDir = filepath.Join(base, "logs")
-	}
+	logDir := logging.ResolveLogDirectory(cfg)
 	s.mgmt.SetLogDirectory(logDir)
 	s.localPassword = optionState.localPassword

@@ -601,16 +601,18 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PATCH("/oauth-excluded-models", s.mgmt.PatchOAuthExcludedModels)
 		mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)

-		mgmt.GET("/oauth-model-mappings", s.mgmt.GetOAuthModelMappings)
-		mgmt.PUT("/oauth-model-mappings", s.mgmt.PutOAuthModelMappings)
-		mgmt.PATCH("/oauth-model-mappings", s.mgmt.PatchOAuthModelMappings)
-		mgmt.DELETE("/oauth-model-mappings", s.mgmt.DeleteOAuthModelMappings)
+		mgmt.GET("/oauth-model-alias", s.mgmt.GetOAuthModelAlias)
+		mgmt.PUT("/oauth-model-alias", s.mgmt.PutOAuthModelAlias)
+		mgmt.PATCH("/oauth-model-alias", s.mgmt.PatchOAuthModelAlias)
+		mgmt.DELETE("/oauth-model-alias", s.mgmt.DeleteOAuthModelAlias)

 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
+		mgmt.GET("/model-definitions/:channel", s.mgmt.GetStaticModelDefinitions)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
+		mgmt.PATCH("/auth-files/status", s.mgmt.PatchAuthFileStatus)
 		mgmt.POST("/vertex/import", s.mgmt.ImportVertexCredential)

 		mgmt.GET("/anthropic-auth-url", s.mgmt.RequestAnthropicToken)
@@ -912,6 +914,16 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 			log.Debugf("disable_cooling toggled to %t", cfg.DisableCooling)
 		}
 	}
+
+	if oldCfg == nil || oldCfg.CodexInstructionsEnabled != cfg.CodexInstructionsEnabled {
+		misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
+		if oldCfg != nil {
+			log.Debugf("codex_instructions_enabled updated from %t to %t", oldCfg.CodexInstructionsEnabled, cfg.CodexInstructionsEnabled)
+		} else {
+			log.Debugf("codex_instructions_enabled toggled to %t", cfg.CodexInstructionsEnabled)
+		}
+	}
+
 	if s.handlers != nil && s.handlers.AuthManager != nil {
 		s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
 	}
@@ -979,14 +991,17 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 		s.mgmt.SetAuthManager(s.handlers.AuthManager)
 	}

-	// Notify Amp module of config changes (for model mapping hot-reload)
-	if s.ampModule != nil {
-		log.Debugf("triggering amp module config update")
-		if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
-			log.Errorf("failed to update Amp module config: %v", err)
+	// Notify Amp module only when Amp config has changed.
+	ampConfigChanged := oldCfg == nil || !reflect.DeepEqual(oldCfg.AmpCode, cfg.AmpCode)
+	if ampConfigChanged {
+		if s.ampModule != nil {
+			log.Debugf("triggering amp module config update")
+			if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
+				log.Errorf("failed to update Amp module config: %v", err)
+			}
+		} else {
+			log.Warnf("amp module is nil, skipping config update")
 		}
-	} else {
-		log.Warnf("amp module is nil, skipping config update")
 	}

 	// Count client sources from configuration and auth store.
--- a/internal/auth/antigravity/auth.go
+++ b/internal/auth/antigravity/auth.go
@@ -0,0 +1,344 @@
+// Package antigravity provides OAuth2 authentication functionality for the Antigravity provider.
+package antigravity
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+)
+
+// TokenResponse represents OAuth token response from Google
+type TokenResponse struct {
+	AccessToken  string `json:"access_token"`
+	RefreshToken string `json:"refresh_token"`
+	ExpiresIn    int64  `json:"expires_in"`
+	TokenType    string `json:"token_type"`
+}
+
+// userInfo represents Google user profile
+type userInfo struct {
+	Email string `json:"email"`
+}
+
+// AntigravityAuth handles Antigravity OAuth authentication
+type AntigravityAuth struct {
+	httpClient *http.Client
+}
+
+// NewAntigravityAuth creates a new Antigravity auth service.
+func NewAntigravityAuth(cfg *config.Config, httpClient *http.Client) *AntigravityAuth {
+	if httpClient != nil {
+		return &AntigravityAuth{httpClient: httpClient}
+	}
+	if cfg == nil {
+		cfg = &config.Config{}
+	}
+	return &AntigravityAuth{
+		httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{}),
+	}
+}
+
+// BuildAuthURL generates the OAuth authorization URL.
+func (o *AntigravityAuth) BuildAuthURL(state, redirectURI string) string {
+	if strings.TrimSpace(redirectURI) == "" {
+		redirectURI = fmt.Sprintf("http://localhost:%d/oauth-callback", CallbackPort)
+	}
+	params := url.Values{}
+	params.Set("access_type", "offline")
+	params.Set("client_id", ClientID)
+	params.Set("prompt", "consent")
+	params.Set("redirect_uri", redirectURI)
+	params.Set("response_type", "code")
+	params.Set("scope", strings.Join(Scopes, " "))
+	params.Set("state", state)
+	return AuthEndpoint + "?" + params.Encode()
+}
+
+// ExchangeCodeForTokens exchanges authorization code for access and refresh tokens
+func (o *AntigravityAuth) ExchangeCodeForTokens(ctx context.Context, code, redirectURI string) (*TokenResponse, error) {
+	data := url.Values{}
+	data.Set("code", code)
+	data.Set("client_id", ClientID)
+	data.Set("client_secret", ClientSecret)
+	data.Set("redirect_uri", redirectURI)
+	data.Set("grant_type", "authorization_code")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, TokenEndpoint, strings.NewReader(data.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("antigravity token exchange: create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return nil, fmt.Errorf("antigravity token exchange: execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity token exchange: close body error: %v", errClose)
+		}
+	}()
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		bodyBytes, errRead := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
+		if errRead != nil {
+			return nil, fmt.Errorf("antigravity token exchange: read response: %w", errRead)
+		}
+		body := strings.TrimSpace(string(bodyBytes))
+		if body == "" {
+			return nil, fmt.Errorf("antigravity token exchange: request failed: status %d", resp.StatusCode)
+		}
+		return nil, fmt.Errorf("antigravity token exchange: request failed: status %d: %s", resp.StatusCode, body)
+	}
+
+	var token TokenResponse
+	if errDecode := json.NewDecoder(resp.Body).Decode(&token); errDecode != nil {
+		return nil, fmt.Errorf("antigravity token exchange: decode response: %w", errDecode)
+	}
+	return &token, nil
+}
+
+// FetchUserInfo retrieves user email from Google
+func (o *AntigravityAuth) FetchUserInfo(ctx context.Context, accessToken string) (string, error) {
+	accessToken = strings.TrimSpace(accessToken)
+	if accessToken == "" {
+		return "", fmt.Errorf("antigravity userinfo: missing access token")
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, UserInfoEndpoint, nil)
+	if err != nil {
+		return "", fmt.Errorf("antigravity userinfo: create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return "", fmt.Errorf("antigravity userinfo: execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity userinfo: close body error: %v", errClose)
+		}
+	}()
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		bodyBytes, errRead := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
+		if errRead != nil {
+			return "", fmt.Errorf("antigravity userinfo: read response: %w", errRead)
+		}
+		body := strings.TrimSpace(string(bodyBytes))
+		if body == "" {
+			return "", fmt.Errorf("antigravity userinfo: request failed: status %d", resp.StatusCode)
+		}
+		return "", fmt.Errorf("antigravity userinfo: request failed: status %d: %s", resp.StatusCode, body)
+	}
+	var info userInfo
+	if errDecode := json.NewDecoder(resp.Body).Decode(&info); errDecode != nil {
+		return "", fmt.Errorf("antigravity userinfo: decode response: %w", errDecode)
+	}
+	email := strings.TrimSpace(info.Email)
+	if email == "" {
+		return "", fmt.Errorf("antigravity userinfo: response missing email")
+	}
+	return email, nil
+}
+
+// FetchProjectID retrieves the project ID for the authenticated user via loadCodeAssist
+func (o *AntigravityAuth) FetchProjectID(ctx context.Context, accessToken string) (string, error) {
+	loadReqBody := map[string]any{
+		"metadata": map[string]string{
+			"ideType":    "ANTIGRAVITY",
+			"platform":   "PLATFORM_UNSPECIFIED",
+			"pluginType": "GEMINI",
+		},
+	}
+
+	rawBody, errMarshal := json.Marshal(loadReqBody)
+	if errMarshal != nil {
+		return "", fmt.Errorf("marshal request body: %w", errMarshal)
+	}
+
+	endpointURL := fmt.Sprintf("%s/%s:loadCodeAssist", APIEndpoint, APIVersion)
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
+	if err != nil {
+		return "", fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", APIUserAgent)
+	req.Header.Set("X-Goog-Api-Client", APIClient)
+	req.Header.Set("Client-Metadata", ClientMetadata)
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return "", fmt.Errorf("execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity loadCodeAssist: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, errRead := io.ReadAll(resp.Body)
+	if errRead != nil {
+		return "", fmt.Errorf("read response: %w", errRead)
+	}
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		return "", fmt.Errorf("request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(bodyBytes)))
+	}
+
+	var loadResp map[string]any
+	if errDecode := json.Unmarshal(bodyBytes, &loadResp); errDecode != nil {
+		return "", fmt.Errorf("decode response: %w", errDecode)
+	}
+
+	// Extract projectID from response
+	projectID := ""
+	if id, ok := loadResp["cloudaicompanionProject"].(string); ok {
+		projectID = strings.TrimSpace(id)
+	}
+	if projectID == "" {
+		if projectMap, ok := loadResp["cloudaicompanionProject"].(map[string]any); ok {
+			if id, okID := projectMap["id"].(string); okID {
+				projectID = strings.TrimSpace(id)
+			}
+		}
+	}
+
+	if projectID == "" {
+		tierID := "legacy-tier"
+		if tiers, okTiers := loadResp["allowedTiers"].([]any); okTiers {
+			for _, rawTier := range tiers {
+				tier, okTier := rawTier.(map[string]any)
+				if !okTier {
+					continue
+				}
+				if isDefault, okDefault := tier["isDefault"].(bool); okDefault && isDefault {
+					if id, okID := tier["id"].(string); okID && strings.TrimSpace(id) != "" {
+						tierID = strings.TrimSpace(id)
+						break
+					}
+				}
+			}
+		}
+
+		projectID, err = o.OnboardUser(ctx, accessToken, tierID)
+		if err != nil {
+			return "", err
+		}
+		return projectID, nil
+	}
+
+	return projectID, nil
+}
+
+// OnboardUser attempts to fetch the project ID via onboardUser by polling for completion
+func (o *AntigravityAuth) OnboardUser(ctx context.Context, accessToken, tierID string) (string, error) {
+	log.Infof("Antigravity: onboarding user with tier: %s", tierID)
+	requestBody := map[string]any{
+		"tierId": tierID,
+		"metadata": map[string]string{
+			"ideType":    "ANTIGRAVITY",
+			"platform":   "PLATFORM_UNSPECIFIED",
+			"pluginType": "GEMINI",
+		},
+	}
+
+	rawBody, errMarshal := json.Marshal(requestBody)
+	if errMarshal != nil {
+		return "", fmt.Errorf("marshal request body: %w", errMarshal)
+	}
+
+	maxAttempts := 5
+	for attempt := 1; attempt <= maxAttempts; attempt++ {
+		log.Debugf("Polling attempt %d/%d", attempt, maxAttempts)
+
+		reqCtx := ctx
+		var cancel context.CancelFunc
+		if reqCtx == nil {
+			reqCtx = context.Background()
+		}
+		reqCtx, cancel = context.WithTimeout(reqCtx, 30*time.Second)
+
+		endpointURL := fmt.Sprintf("%s/%s:onboardUser", APIEndpoint, APIVersion)
+		req, errRequest := http.NewRequestWithContext(reqCtx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
+		if errRequest != nil {
+			cancel()
+			return "", fmt.Errorf("create request: %w", errRequest)
+		}
+		req.Header.Set("Authorization", "Bearer "+accessToken)
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("User-Agent", APIUserAgent)
+		req.Header.Set("X-Goog-Api-Client", APIClient)
+		req.Header.Set("Client-Metadata", ClientMetadata)
+
+		resp, errDo := o.httpClient.Do(req)
+		if errDo != nil {
+			cancel()
+			return "", fmt.Errorf("execute request: %w", errDo)
+		}
+
+		bodyBytes, errRead := io.ReadAll(resp.Body)
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("close body error: %v", errClose)
+		}
+		cancel()
+
+		if errRead != nil {
+			return "", fmt.Errorf("read response: %w", errRead)
+		}
+
+		if resp.StatusCode == http.StatusOK {
+			var data map[string]any
+			if errDecode := json.Unmarshal(bodyBytes, &data); errDecode != nil {
+				return "", fmt.Errorf("decode response: %w", errDecode)
+			}
+
+			if done, okDone := data["done"].(bool); okDone && done {
+				projectID := ""
+				if responseData, okResp := data["response"].(map[string]any); okResp {
+					switch projectValue := responseData["cloudaicompanionProject"].(type) {
+					case map[string]any:
+						if id, okID := projectValue["id"].(string); okID {
+							projectID = strings.TrimSpace(id)
+						}
+					case string:
+						projectID = strings.TrimSpace(projectValue)
+					}
+				}
+
+				if projectID != "" {
+					log.Infof("Successfully fetched project_id: %s", projectID)
+					return projectID, nil
+				}
+
+				return "", fmt.Errorf("no project_id in response")
+			}
+
+			time.Sleep(2 * time.Second)
+			continue
+		}
+
+		responsePreview := strings.TrimSpace(string(bodyBytes))
+		if len(responsePreview) > 500 {
+			responsePreview = responsePreview[:500]
+		}
+
+		responseErr := responsePreview
+		if len(responseErr) > 200 {
+			responseErr = responseErr[:200]
+		}
+		return "", fmt.Errorf("http %d: %s", resp.StatusCode, responseErr)
+	}
+
+	return "", nil
+}
--- a/internal/auth/antigravity/constants.go
+++ b/internal/auth/antigravity/constants.go
@@ -0,0 +1,34 @@
+// Package antigravity provides OAuth2 authentication functionality for the Antigravity provider.
+package antigravity
+
+// OAuth client credentials and configuration
+const (
+	ClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	ClientSecret = "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"
+	CallbackPort = 51121
+)
+
+// Scopes defines the OAuth scopes required for Antigravity authentication
+var Scopes = []string{
+	"https://www.googleapis.com/auth/cloud-platform",
+	"https://www.googleapis.com/auth/userinfo.email",
+	"https://www.googleapis.com/auth/userinfo.profile",
+	"https://www.googleapis.com/auth/cclog",
+	"https://www.googleapis.com/auth/experimentsandconfigs",
+}
+
+// OAuth2 endpoints for Google authentication
+const (
+	TokenEndpoint    = "https://oauth2.googleapis.com/token"
+	AuthEndpoint     = "https://accounts.google.com/o/oauth2/v2/auth"
+	UserInfoEndpoint = "https://www.googleapis.com/oauth2/v1/userinfo?alt=json"
+)
+
+// Antigravity API configuration
+const (
+	APIEndpoint    = "https://cloudcode-pa.googleapis.com"
+	APIVersion     = "v1internal"
+	APIUserAgent   = "google-api-nodejs-client/9.15.1"
+	APIClient      = "google-cloud-sdk vscode_cloudshelleditor/0.1"
+	ClientMetadata = `{"ideType":"IDE_UNSPECIFIED","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}`
+)
--- a/internal/auth/antigravity/filename.go
+++ b/internal/auth/antigravity/filename.go
@@ -0,0 +1,16 @@
+package antigravity
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CredentialFileName returns the filename used to persist Antigravity credentials.
+// It uses the email as a suffix to disambiguate accounts.
+func CredentialFileName(email string) string {
+	email = strings.TrimSpace(email)
+	if email == "" {
+		return "antigravity.json"
+	}
+	return fmt.Sprintf("antigravity-%s.json", email)
+}
--- a/internal/auth/claude/anthropic_auth.go
+++ b/internal/auth/claude/anthropic_auth.go
@@ -14,15 +14,15 @@ import (
 	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )

+// OAuth configuration constants for Claude/Anthropic
 const (
-	anthropicAuthURL  = "https://claude.ai/oauth/authorize"
-	anthropicTokenURL = "https://console.anthropic.com/v1/oauth/token"
-	anthropicClientID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
-	redirectURI       = "http://localhost:54545/callback"
+	AuthURL     = "https://claude.ai/oauth/authorize"
+	TokenURL    = "https://console.anthropic.com/v1/oauth/token"
+	ClientID    = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+	RedirectURI = "http://localhost:54545/callback"
 )

 // tokenResponse represents the response structure from Anthropic's OAuth token endpoint.
@@ -50,7 +50,8 @@ type ClaudeAuth struct {
 }

 // NewClaudeAuth creates a new Anthropic authentication service.
-// It initializes the HTTP client with proxy settings from the configuration.
+// It initializes the HTTP client with a custom TLS transport that uses Firefox
+// fingerprint to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
 //
 // Parameters:
 //   - cfg: The application configuration containing proxy settings
@@ -58,8 +59,10 @@ type ClaudeAuth struct {
 // Returns:
 //   - *ClaudeAuth: A new Claude authentication service instance
 func NewClaudeAuth(cfg *config.Config) *ClaudeAuth {
+	// Use custom HTTP client with Firefox TLS fingerprint to bypass
+	// Cloudflare's bot detection on Anthropic domains
 	return &ClaudeAuth{
-		httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{}),
+		httpClient: NewAnthropicHttpClient(&cfg.SDKConfig),
 	}
 }

@@ -82,16 +85,16 @@ func (o *ClaudeAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string

 	params := url.Values{
 		"code":                  {"true"},
-		"client_id":             {anthropicClientID},
+		"client_id":             {ClientID},
 		"response_type":         {"code"},
-		"redirect_uri":          {redirectURI},
+		"redirect_uri":          {RedirectURI},
 		"scope":                 {"org:create_api_key user:profile user:inference"},
 		"code_challenge":        {pkceCodes.CodeChallenge},
 		"code_challenge_method": {"S256"},
 		"state":                 {state},
 	}

-	authURL := fmt.Sprintf("%s?%s", anthropicAuthURL, params.Encode())
+	authURL := fmt.Sprintf("%s?%s", AuthURL, params.Encode())
 	return authURL, state, nil
 }

@@ -137,8 +140,8 @@ func (o *ClaudeAuth) ExchangeCodeForTokens(ctx context.Context, code, state stri
 		"code":          newCode,
 		"state":         state,
 		"grant_type":    "authorization_code",
-		"client_id":     anthropicClientID,
-		"redirect_uri":  redirectURI,
+		"client_id":     ClientID,
+		"redirect_uri":  RedirectURI,
 		"code_verifier": pkceCodes.CodeVerifier,
 	}

@@ -154,7 +157,7 @@ func (o *ClaudeAuth) ExchangeCodeForTokens(ctx context.Context, code, state stri

 	// log.Debugf("Token exchange request: %s", string(jsonBody))

-	req, err := http.NewRequestWithContext(ctx, "POST", anthropicTokenURL, strings.NewReader(string(jsonBody)))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(string(jsonBody)))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create token request: %w", err)
 	}
@@ -221,7 +224,7 @@ func (o *ClaudeAuth) RefreshTokens(ctx context.Context, refreshToken string) (*C
 	}

 	reqBody := map[string]interface{}{
-		"client_id":     anthropicClientID,
+		"client_id":     ClientID,
 		"grant_type":    "refresh_token",
 		"refresh_token": refreshToken,
 	}
@@ -231,7 +234,7 @@ func (o *ClaudeAuth) RefreshTokens(ctx context.Context, refreshToken string) (*C
 		return nil, fmt.Errorf("failed to marshal request body: %w", err)
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", anthropicTokenURL, strings.NewReader(string(jsonBody)))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(string(jsonBody)))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create refresh request: %w", err)
 	}
--- a/internal/auth/claude/utls_transport.go
+++ b/internal/auth/claude/utls_transport.go
@@ -0,0 +1,165 @@
+// Package claude provides authentication functionality for Anthropic's Claude API.
+// This file implements a custom HTTP transport using utls to bypass TLS fingerprinting.
+package claude
+
+import (
+	"net/http"
+	"net/url"
+	"strings"
+	"sync"
+
+	tls "github.com/refraction-networking/utls"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/net/http2"
+	"golang.org/x/net/proxy"
+)
+
+// utlsRoundTripper implements http.RoundTripper using utls with Firefox fingerprint
+// to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
+type utlsRoundTripper struct {
+	// mu protects the connections map and pending map
+	mu sync.Mutex
+	// connections caches HTTP/2 client connections per host
+	connections map[string]*http2.ClientConn
+	// pending tracks hosts that are currently being connected to (prevents race condition)
+	pending map[string]*sync.Cond
+	// dialer is used to create network connections, supporting proxies
+	dialer proxy.Dialer
+}
+
+// newUtlsRoundTripper creates a new utls-based round tripper with optional proxy support
+func newUtlsRoundTripper(cfg *config.SDKConfig) *utlsRoundTripper {
+	var dialer proxy.Dialer = proxy.Direct
+	if cfg != nil && cfg.ProxyURL != "" {
+		proxyURL, err := url.Parse(cfg.ProxyURL)
+		if err != nil {
+			log.Errorf("failed to parse proxy URL %q: %v", cfg.ProxyURL, err)
+		} else {
+			pDialer, err := proxy.FromURL(proxyURL, proxy.Direct)
+			if err != nil {
+				log.Errorf("failed to create proxy dialer for %q: %v", cfg.ProxyURL, err)
+			} else {
+				dialer = pDialer
+			}
+		}
+	}
+
+	return &utlsRoundTripper{
+		connections: make(map[string]*http2.ClientConn),
+		pending:     make(map[string]*sync.Cond),
+		dialer:      dialer,
+	}
+}
+
+// getOrCreateConnection gets an existing connection or creates a new one.
+// It uses a per-host locking mechanism to prevent multiple goroutines from
+// creating connections to the same host simultaneously.
+func (t *utlsRoundTripper) getOrCreateConnection(host, addr string) (*http2.ClientConn, error) {
+	t.mu.Lock()
+
+	// Check if connection exists and is usable
+	if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
+		t.mu.Unlock()
+		return h2Conn, nil
+	}
+
+	// Check if another goroutine is already creating a connection
+	if cond, ok := t.pending[host]; ok {
+		// Wait for the other goroutine to finish
+		cond.Wait()
+		// Check if connection is now available
+		if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
+			t.mu.Unlock()
+			return h2Conn, nil
+		}
+		// Connection still not available, we'll create one
+	}
+
+	// Mark this host as pending
+	cond := sync.NewCond(&t.mu)
+	t.pending[host] = cond
+	t.mu.Unlock()
+
+	// Create connection outside the lock
+	h2Conn, err := t.createConnection(host, addr)
+
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	// Remove pending marker and wake up waiting goroutines
+	delete(t.pending, host)
+	cond.Broadcast()
+
+	if err != nil {
+		return nil, err
+	}
+
+	// Store the new connection
+	t.connections[host] = h2Conn
+	return h2Conn, nil
+}
+
+// createConnection creates a new HTTP/2 connection with Firefox TLS fingerprint
+func (t *utlsRoundTripper) createConnection(host, addr string) (*http2.ClientConn, error) {
+	conn, err := t.dialer.Dial("tcp", addr)
+	if err != nil {
+		return nil, err
+	}
+
+	tlsConfig := &tls.Config{ServerName: host}
+	tlsConn := tls.UClient(conn, tlsConfig, tls.HelloFirefox_Auto)
+
+	if err := tlsConn.Handshake(); err != nil {
+		conn.Close()
+		return nil, err
+	}
+
+	tr := &http2.Transport{}
+	h2Conn, err := tr.NewClientConn(tlsConn)
+	if err != nil {
+		tlsConn.Close()
+		return nil, err
+	}
+
+	return h2Conn, nil
+}
+
+// RoundTrip implements http.RoundTripper
+func (t *utlsRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	host := req.URL.Host
+	addr := host
+	if !strings.Contains(addr, ":") {
+		addr += ":443"
+	}
+
+	// Get hostname without port for TLS ServerName
+	hostname := req.URL.Hostname()
+
+	h2Conn, err := t.getOrCreateConnection(hostname, addr)
+	if err != nil {
+		return nil, err
+	}
+
+	resp, err := h2Conn.RoundTrip(req)
+	if err != nil {
+		// Connection failed, remove it from cache
+		t.mu.Lock()
+		if cached, ok := t.connections[hostname]; ok && cached == h2Conn {
+			delete(t.connections, hostname)
+		}
+		t.mu.Unlock()
+		return nil, err
+	}
+
+	return resp, nil
+}
+
+// NewAnthropicHttpClient creates an HTTP client that bypasses TLS fingerprinting
+// for Anthropic domains by using utls with Firefox fingerprint.
+// It accepts optional SDK configuration for proxy settings.
+func NewAnthropicHttpClient(cfg *config.SDKConfig) *http.Client {
+	return &http.Client{
+		Transport: newUtlsRoundTripper(cfg),
+	}
+}
--- a/internal/auth/codex/filename.go
+++ b/internal/auth/codex/filename.go
@@ -0,0 +1,46 @@
+package codex
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+)
+
+// CredentialFileName returns the filename used to persist Codex OAuth credentials.
+// When planType is available (e.g. "plus", "team"), it is appended after the email
+// as a suffix to disambiguate subscriptions.
+func CredentialFileName(email, planType, hashAccountID string, includeProviderPrefix bool) string {
+	email = strings.TrimSpace(email)
+	plan := normalizePlanTypeForFilename(planType)
+
+	prefix := ""
+	if includeProviderPrefix {
+		prefix = "codex"
+	}
+
+	if plan == "" {
+		return fmt.Sprintf("%s-%s.json", prefix, email)
+	} else if plan == "team" {
+		return fmt.Sprintf("%s-%s-%s-%s.json", prefix, hashAccountID, email, plan)
+	}
+	return fmt.Sprintf("%s-%s-%s.json", prefix, email, plan)
+}
+
+func normalizePlanTypeForFilename(planType string) string {
+	planType = strings.TrimSpace(planType)
+	if planType == "" {
+		return ""
+	}
+
+	parts := strings.FieldsFunc(planType, func(r rune) bool {
+		return !unicode.IsLetter(r) && !unicode.IsDigit(r)
+	})
+	if len(parts) == 0 {
+		return ""
+	}
+
+	for i, part := range parts {
+		parts[i] = strings.ToLower(strings.TrimSpace(part))
+	}
+	return strings.Join(parts, "-")
+}
--- a/internal/auth/codex/openai_auth.go
+++ b/internal/auth/codex/openai_auth.go
@@ -19,11 +19,12 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+// OAuth configuration constants for OpenAI Codex
 const (
-	openaiAuthURL  = "https://auth.openai.com/oauth/authorize"
-	openaiTokenURL = "https://auth.openai.com/oauth/token"
-	openaiClientID = "app_EMoamEEZ73f0CkXaXp7hrann"
-	redirectURI    = "http://localhost:1455/auth/callback"
+	AuthURL     = "https://auth.openai.com/oauth/authorize"
+	TokenURL    = "https://auth.openai.com/oauth/token"
+	ClientID    = "app_EMoamEEZ73f0CkXaXp7hrann"
+	RedirectURI = "http://localhost:1455/auth/callback"
 )

 // CodexAuth handles the OpenAI OAuth2 authentication flow.
@@ -50,9 +51,9 @@ func (o *CodexAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string,
 	}

 	params := url.Values{
-		"client_id":                  {openaiClientID},
+		"client_id":                  {ClientID},
 		"response_type":              {"code"},
-		"redirect_uri":               {redirectURI},
+		"redirect_uri":               {RedirectURI},
 		"scope":                      {"openid email profile offline_access"},
 		"state":                      {state},
 		"code_challenge":             {pkceCodes.CodeChallenge},
@@ -62,7 +63,7 @@ func (o *CodexAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string,
 		"codex_cli_simplified_flow":  {"true"},
 	}

-	authURL := fmt.Sprintf("%s?%s", openaiAuthURL, params.Encode())
+	authURL := fmt.Sprintf("%s?%s", AuthURL, params.Encode())
 	return authURL, nil
 }

@@ -77,13 +78,13 @@ func (o *CodexAuth) ExchangeCodeForTokens(ctx context.Context, code string, pkce
 	// Prepare token exchange request
 	data := url.Values{
 		"grant_type":    {"authorization_code"},
-		"client_id":     {openaiClientID},
+		"client_id":     {ClientID},
 		"code":          {code},
-		"redirect_uri":  {redirectURI},
+		"redirect_uri":  {RedirectURI},
 		"code_verifier": {pkceCodes.CodeVerifier},
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", openaiTokenURL, strings.NewReader(data.Encode()))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(data.Encode()))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create token request: %w", err)
 	}
@@ -163,13 +164,13 @@ func (o *CodexAuth) RefreshTokens(ctx context.Context, refreshToken string) (*Co
 	}

 	data := url.Values{
-		"client_id":     {openaiClientID},
+		"client_id":     {ClientID},
 		"grant_type":    {"refresh_token"},
 		"refresh_token": {refreshToken},
 		"scope":         {"openid profile email"},
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", openaiTokenURL, strings.NewReader(data.Encode()))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(data.Encode()))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create refresh request: %w", err)
 	}
--- a/internal/auth/gemini/gemini_auth.go
+++ b/internal/auth/gemini/gemini_auth.go
@@ -28,19 +28,19 @@ import (
 	"golang.org/x/oauth2/google"
 )

+// OAuth configuration constants for Gemini
 const (
-	geminiOauthClientID       = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-	geminiOauthClientSecret   = "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"
-	geminiDefaultCallbackPort = 8085
+	ClientID            = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+	ClientSecret        = "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"
+	DefaultCallbackPort = 8085
 )

-var (
-	geminiOauthScopes = []string{
-		"https://www.googleapis.com/auth/cloud-platform",
-		"https://www.googleapis.com/auth/userinfo.email",
-		"https://www.googleapis.com/auth/userinfo.profile",
-	}
-)
+// OAuth scopes for Gemini authentication
+var Scopes = []string{
+	"https://www.googleapis.com/auth/cloud-platform",
+	"https://www.googleapis.com/auth/userinfo.email",
+	"https://www.googleapis.com/auth/userinfo.profile",
+}

 // GeminiAuth provides methods for handling the Gemini OAuth2 authentication flow.
 // It encapsulates the logic for obtaining, storing, and refreshing authentication tokens
@@ -74,7 +74,7 @@ func NewGeminiAuth() *GeminiAuth {
 //   - *http.Client: An HTTP client configured with authentication
 //   - error: An error if the client configuration fails, nil otherwise
 func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiTokenStorage, cfg *config.Config, opts *WebLoginOptions) (*http.Client, error) {
-	callbackPort := geminiDefaultCallbackPort
+	callbackPort := DefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
@@ -112,10 +112,10 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken

 	// Configure the OAuth2 client.
 	conf := &oauth2.Config{
-		ClientID:     geminiOauthClientID,
-		ClientSecret: geminiOauthClientSecret,
+		ClientID:     ClientID,
+		ClientSecret: ClientSecret,
 		RedirectURL:  callbackURL, // This will be used by the local server.
-		Scopes:       geminiOauthScopes,
+		Scopes:       Scopes,
 		Endpoint:     google.Endpoint,
 	}

@@ -198,9 +198,9 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 	}

 	ifToken["token_uri"] = "https://oauth2.googleapis.com/token"
-	ifToken["client_id"] = geminiOauthClientID
-	ifToken["client_secret"] = geminiOauthClientSecret
-	ifToken["scopes"] = geminiOauthScopes
+	ifToken["client_id"] = ClientID
+	ifToken["client_secret"] = ClientSecret
+	ifToken["scopes"] = Scopes
 	ifToken["universe_domain"] = "googleapis.com"

 	ts := GeminiTokenStorage{
@@ -226,7 +226,7 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 //   - *oauth2.Token: The OAuth2 token obtained from the authorization flow
 //   - error: An error if the token acquisition fails, nil otherwise
 func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, opts *WebLoginOptions) (*oauth2.Token, error) {
-	callbackPort := geminiDefaultCallbackPort
+	callbackPort := DefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
--- a/internal/cache/signature_cache.go
+++ b/internal/cache/signature_cache.go
@@ -3,6 +3,7 @@ package cache
 import (
 	"crypto/sha256"
 	"encoding/hex"
+	"strings"
 	"sync"
 	"time"
 )
@@ -23,18 +24,18 @@ const (
 	// MinValidSignatureLen is the minimum length for a signature to be considered valid
 	MinValidSignatureLen = 50

-	// SessionCleanupInterval controls how often stale sessions are purged
-	SessionCleanupInterval = 10 * time.Minute
+	// CacheCleanupInterval controls how often stale entries are purged
+	CacheCleanupInterval = 10 * time.Minute
 )

-// signatureCache stores signatures by sessionId -> textHash -> SignatureEntry
+// signatureCache stores signatures by model group -> textHash -> SignatureEntry
 var signatureCache sync.Map

-// sessionCleanupOnce ensures the background cleanup goroutine starts only once
-var sessionCleanupOnce sync.Once
+// cacheCleanupOnce ensures the background cleanup goroutine starts only once
+var cacheCleanupOnce sync.Once

-// sessionCache is the inner map type
-type sessionCache struct {
+// groupCache is the inner map type
+type groupCache struct {
 	mu      sync.RWMutex
 	entries map[string]SignatureEntry
 }
@@ -45,36 +46,36 @@ func hashText(text string) string {
 	return hex.EncodeToString(h[:])[:SignatureTextHashLen]
 }

-// getOrCreateSession gets or creates a session cache
-func getOrCreateSession(sessionID string) *sessionCache {
+// getOrCreateGroupCache gets or creates a cache bucket for a model group
+func getOrCreateGroupCache(groupKey string) *groupCache {
 	// Start background cleanup on first access
-	sessionCleanupOnce.Do(startSessionCleanup)
+	cacheCleanupOnce.Do(startCacheCleanup)

-	if val, ok := signatureCache.Load(sessionID); ok {
-		return val.(*sessionCache)
+	if val, ok := signatureCache.Load(groupKey); ok {
+		return val.(*groupCache)
 	}
-	sc := &sessionCache{entries: make(map[string]SignatureEntry)}
-	actual, _ := signatureCache.LoadOrStore(sessionID, sc)
-	return actual.(*sessionCache)
+	sc := &groupCache{entries: make(map[string]SignatureEntry)}
+	actual, _ := signatureCache.LoadOrStore(groupKey, sc)
+	return actual.(*groupCache)
 }

-// startSessionCleanup launches a background goroutine that periodically
-// removes sessions where all entries have expired.
-func startSessionCleanup() {
+// startCacheCleanup launches a background goroutine that periodically
+// removes caches where all entries have expired.
+func startCacheCleanup() {
 	go func() {
-		ticker := time.NewTicker(SessionCleanupInterval)
+		ticker := time.NewTicker(CacheCleanupInterval)
 		defer ticker.Stop()
 		for range ticker.C {
-			purgeExpiredSessions()
+			purgeExpiredCaches()
 		}
 	}()
 }

-// purgeExpiredSessions removes sessions with no valid (non-expired) entries.
-func purgeExpiredSessions() {
+// purgeExpiredCaches removes caches with no valid (non-expired) entries.
+func purgeExpiredCaches() {
 	now := time.Now()
 	signatureCache.Range(func(key, value any) bool {
-		sc := value.(*sessionCache)
+		sc := value.(*groupCache)
 		sc.mu.Lock()
 		// Remove expired entries
 		for k, entry := range sc.entries {
@@ -84,7 +85,7 @@ func purgeExpiredSessions() {
 		}
 		isEmpty := len(sc.entries) == 0
 		sc.mu.Unlock()
-		// Remove session if empty
+		// Remove cache bucket if empty
 		if isEmpty {
 			signatureCache.Delete(key)
 		}
@@ -92,19 +93,19 @@ func purgeExpiredSessions() {
 	})
 }

-// CacheSignature stores a thinking signature for a given session and text.
+// CacheSignature stores a thinking signature for a given model group and text.
 // Used for Claude models that require signed thinking blocks in multi-turn conversations.
-func CacheSignature(sessionID, text, signature string) {
-	if sessionID == "" || text == "" || signature == "" {
+func CacheSignature(modelName, text, signature string) {
+	if text == "" || signature == "" {
 		return
 	}
 	if len(signature) < MinValidSignatureLen {
 		return
 	}

-	sc := getOrCreateSession(sessionID)
+	groupKey := GetModelGroup(modelName)
 	textHash := hashText(text)
-
+	sc := getOrCreateGroupCache(groupKey)
 	sc.mu.Lock()
 	defer sc.mu.Unlock()

@@ -114,18 +115,25 @@ func CacheSignature(sessionID, text, signature string) {
 	}
 }

-// GetCachedSignature retrieves a cached signature for a given session and text.
+// GetCachedSignature retrieves a cached signature for a given model group and text.
 // Returns empty string if not found or expired.
-func GetCachedSignature(sessionID, text string) string {
-	if sessionID == "" || text == "" {
-		return ""
-	}
+func GetCachedSignature(modelName, text string) string {
+	groupKey := GetModelGroup(modelName)

-	val, ok := signatureCache.Load(sessionID)
-	if !ok {
+	if text == "" {
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
 		return ""
 	}
-	sc := val.(*sessionCache)
+	val, ok := signatureCache.Load(groupKey)
+	if !ok {
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
+		return ""
+	}
+	sc := val.(*groupCache)

 	textHash := hashText(text)

@@ -135,11 +143,17 @@ func GetCachedSignature(sessionID, text string) string {
 	entry, exists := sc.entries[textHash]
 	if !exists {
 		sc.mu.Unlock()
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
 		return ""
 	}
 	if now.Sub(entry.Timestamp) > SignatureCacheTTL {
 		delete(sc.entries, textHash)
 		sc.mu.Unlock()
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
 		return ""
 	}

@@ -151,19 +165,31 @@ func GetCachedSignature(sessionID, text string) string {
 	return entry.Signature
 }

-// ClearSignatureCache clears signature cache for a specific session or all sessions.
-func ClearSignatureCache(sessionID string) {
-	if sessionID != "" {
-		signatureCache.Delete(sessionID)
-	} else {
+// ClearSignatureCache clears signature cache for a specific model group or all groups.
+func ClearSignatureCache(modelName string) {
+	if modelName == "" {
 		signatureCache.Range(func(key, _ any) bool {
 			signatureCache.Delete(key)
 			return true
 		})
+		return
 	}
+	groupKey := GetModelGroup(modelName)
+	signatureCache.Delete(groupKey)
 }

 // HasValidSignature checks if a signature is valid (non-empty and long enough)
-func HasValidSignature(signature string) bool {
-	return signature != "" && len(signature) >= MinValidSignatureLen
+func HasValidSignature(modelName, signature string) bool {
+	return (signature != "" && len(signature) >= MinValidSignatureLen) || (signature == "skip_thought_signature_validator" && GetModelGroup(modelName) == "gemini")
+}
+
+func GetModelGroup(modelName string) string {
+	if strings.Contains(modelName, "gpt") {
+		return "gpt"
+	} else if strings.Contains(modelName, "claude") {
+		return "claude"
+	} else if strings.Contains(modelName, "gemini") {
+		return "gemini"
+	}
+	return modelName
 }
--- a/internal/cache/signature_cache_test.go
+++ b/internal/cache/signature_cache_test.go
@@ -5,38 +5,40 @@ import (
 	"time"
 )

+const testModelName = "claude-sonnet-4-5"
+
 func TestCacheSignature_BasicStorageAndRetrieval(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "test-session-1"
 	text := "This is some thinking text content"
 	signature := "abc123validSignature1234567890123456789012345678901234567890"

 	// Store signature
-	CacheSignature(sessionID, text, signature)
+	CacheSignature(testModelName, text, signature)

 	// Retrieve signature
-	retrieved := GetCachedSignature(sessionID, text)
+	retrieved := GetCachedSignature(testModelName, text)
 	if retrieved != signature {
 		t.Errorf("Expected signature '%s', got '%s'", signature, retrieved)
 	}
 }

-func TestCacheSignature_DifferentSessions(t *testing.T) {
+func TestCacheSignature_DifferentModelGroups(t *testing.T) {
 	ClearSignatureCache("")

-	text := "Same text in different sessions"
+	text := "Same text across models"
 	sig1 := "signature1_1234567890123456789012345678901234567890123456"
 	sig2 := "signature2_1234567890123456789012345678901234567890123456"

-	CacheSignature("session-a", text, sig1)
-	CacheSignature("session-b", text, sig2)
+	geminiModel := "gemini-3-pro-preview"
+	CacheSignature(testModelName, text, sig1)
+	CacheSignature(geminiModel, text, sig2)

-	if GetCachedSignature("session-a", text) != sig1 {
-		t.Error("Session-a signature mismatch")
+	if GetCachedSignature(testModelName, text) != sig1 {
+		t.Error("Claude signature mismatch")
 	}
-	if GetCachedSignature("session-b", text) != sig2 {
-		t.Error("Session-b signature mismatch")
+	if GetCachedSignature(geminiModel, text) != sig2 {
+		t.Error("Gemini signature mismatch")
 	}
 }

@@ -44,13 +46,13 @@ func TestCacheSignature_NotFound(t *testing.T) {
 	ClearSignatureCache("")

 	// Non-existent session
-	if got := GetCachedSignature("nonexistent", "some text"); got != "" {
+	if got := GetCachedSignature(testModelName, "some text"); got != "" {
 		t.Errorf("Expected empty string for nonexistent session, got '%s'", got)
 	}

 	// Existing session but different text
-	CacheSignature("session-x", "text-a", "sigA12345678901234567890123456789012345678901234567890")
-	if got := GetCachedSignature("session-x", "text-b"); got != "" {
+	CacheSignature(testModelName, "text-a", "sigA12345678901234567890123456789012345678901234567890")
+	if got := GetCachedSignature(testModelName, "text-b"); got != "" {
 		t.Errorf("Expected empty string for different text, got '%s'", got)
 	}
 }
@@ -59,12 +61,11 @@ func TestCacheSignature_EmptyInputs(t *testing.T) {
 	ClearSignatureCache("")

 	// All empty/invalid inputs should be no-ops
-	CacheSignature("", "text", "sig12345678901234567890123456789012345678901234567890")
-	CacheSignature("session", "", "sig12345678901234567890123456789012345678901234567890")
-	CacheSignature("session", "text", "")
-	CacheSignature("session", "text", "short") // Too short
+	CacheSignature(testModelName, "", "sig12345678901234567890123456789012345678901234567890")
+	CacheSignature(testModelName, "text", "")
+	CacheSignature(testModelName, "text", "short") // Too short

-	if got := GetCachedSignature("session", "text"); got != "" {
+	if got := GetCachedSignature(testModelName, "text"); got != "" {
 		t.Errorf("Expected empty after invalid cache attempts, got '%s'", got)
 	}
 }
@@ -72,31 +73,27 @@ func TestCacheSignature_EmptyInputs(t *testing.T) {
 func TestCacheSignature_ShortSignatureRejected(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "test-short-sig"
 	text := "Some text"
 	shortSig := "abc123" // Less than 50 chars

-	CacheSignature(sessionID, text, shortSig)
+	CacheSignature(testModelName, text, shortSig)

-	if got := GetCachedSignature(sessionID, text); got != "" {
+	if got := GetCachedSignature(testModelName, text); got != "" {
 		t.Errorf("Short signature should be rejected, got '%s'", got)
 	}
 }

-func TestClearSignatureCache_SpecificSession(t *testing.T) {
+func TestClearSignatureCache_ModelGroup(t *testing.T) {
 	ClearSignatureCache("")

 	sig := "validSig1234567890123456789012345678901234567890123456"
-	CacheSignature("session-1", "text", sig)
-	CacheSignature("session-2", "text", sig)
+	CacheSignature(testModelName, "text", sig)
+	CacheSignature(testModelName, "text-2", sig)

 	ClearSignatureCache("session-1")

-	if got := GetCachedSignature("session-1", "text"); got != "" {
-		t.Error("session-1 should be cleared")
-	}
-	if got := GetCachedSignature("session-2", "text"); got != sig {
-		t.Error("session-2 should still exist")
+	if got := GetCachedSignature(testModelName, "text"); got != sig {
+		t.Error("signature should remain when clearing unknown session")
 	}
 }

@@ -104,35 +101,37 @@ func TestClearSignatureCache_AllSessions(t *testing.T) {
 	ClearSignatureCache("")

 	sig := "validSig1234567890123456789012345678901234567890123456"
-	CacheSignature("session-1", "text", sig)
-	CacheSignature("session-2", "text", sig)
+	CacheSignature(testModelName, "text", sig)
+	CacheSignature(testModelName, "text-2", sig)

 	ClearSignatureCache("")

-	if got := GetCachedSignature("session-1", "text"); got != "" {
-		t.Error("session-1 should be cleared")
+	if got := GetCachedSignature(testModelName, "text"); got != "" {
+		t.Error("text should be cleared")
 	}
-	if got := GetCachedSignature("session-2", "text"); got != "" {
-		t.Error("session-2 should be cleared")
+	if got := GetCachedSignature(testModelName, "text-2"); got != "" {
+		t.Error("text-2 should be cleared")
 	}
 }

 func TestHasValidSignature(t *testing.T) {
 	tests := []struct {
 		name      string
+		modelName string
 		signature string
 		expected  bool
 	}{
-		{"valid long signature", "abc123validSignature1234567890123456789012345678901234567890", true},
-		{"exactly 50 chars", "12345678901234567890123456789012345678901234567890", true},
-		{"49 chars - invalid", "1234567890123456789012345678901234567890123456789", false},
-		{"empty string", "", false},
-		{"short signature", "abc", false},
+		{"valid long signature", testModelName, "abc123validSignature1234567890123456789012345678901234567890", true},
+		{"exactly 50 chars", testModelName, "12345678901234567890123456789012345678901234567890", true},
+		{"49 chars - invalid", testModelName, "1234567890123456789012345678901234567890123456789", false},
+		{"empty string", testModelName, "", false},
+		{"short signature", testModelName, "abc", false},
+		{"gemini sentinel", "gemini-3-pro-preview", "skip_thought_signature_validator", true},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := HasValidSignature(tt.signature)
+			result := HasValidSignature(tt.modelName, tt.signature)
 			if result != tt.expected {
 				t.Errorf("HasValidSignature(%q) = %v, expected %v", tt.signature, result, tt.expected)
 			}
@@ -143,21 +142,19 @@ func TestHasValidSignature(t *testing.T) {
 func TestCacheSignature_TextHashCollisionResistance(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "hash-test-session"
-
 	// Different texts should produce different hashes
 	text1 := "First thinking text"
 	text2 := "Second thinking text"
 	sig1 := "signature1_1234567890123456789012345678901234567890123456"
 	sig2 := "signature2_1234567890123456789012345678901234567890123456"

-	CacheSignature(sessionID, text1, sig1)
-	CacheSignature(sessionID, text2, sig2)
+	CacheSignature(testModelName, text1, sig1)
+	CacheSignature(testModelName, text2, sig2)

-	if GetCachedSignature(sessionID, text1) != sig1 {
+	if GetCachedSignature(testModelName, text1) != sig1 {
 		t.Error("text1 signature mismatch")
 	}
-	if GetCachedSignature(sessionID, text2) != sig2 {
+	if GetCachedSignature(testModelName, text2) != sig2 {
 		t.Error("text2 signature mismatch")
 	}
 }
@@ -165,13 +162,12 @@ func TestCacheSignature_TextHashCollisionResistance(t *testing.T) {
 func TestCacheSignature_UnicodeText(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "unicode-session"
 	text := "한글 텍스트와 이모지 🎉 그리고 特殊文字"
 	sig := "unicodeSig123456789012345678901234567890123456789012345"

-	CacheSignature(sessionID, text, sig)
+	CacheSignature(testModelName, text, sig)

-	if got := GetCachedSignature(sessionID, text); got != sig {
+	if got := GetCachedSignature(testModelName, text); got != sig {
 		t.Errorf("Unicode text signature retrieval failed, got '%s'", got)
 	}
 }
@@ -179,15 +175,14 @@ func TestCacheSignature_UnicodeText(t *testing.T) {
 func TestCacheSignature_Overwrite(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "overwrite-session"
 	text := "Same text"
 	sig1 := "firstSignature12345678901234567890123456789012345678901"
 	sig2 := "secondSignature1234567890123456789012345678901234567890"

-	CacheSignature(sessionID, text, sig1)
-	CacheSignature(sessionID, text, sig2) // Overwrite
+	CacheSignature(testModelName, text, sig1)
+	CacheSignature(testModelName, text, sig2) // Overwrite

-	if got := GetCachedSignature(sessionID, text); got != sig2 {
+	if got := GetCachedSignature(testModelName, text); got != sig2 {
 		t.Errorf("Expected overwritten signature '%s', got '%s'", sig2, got)
 	}
 }
@@ -199,14 +194,13 @@ func TestCacheSignature_ExpirationLogic(t *testing.T) {

 	// This test verifies the expiration check exists
 	// In a real scenario, we'd mock time.Now()
-	sessionID := "expiration-test"
 	text := "text"
 	sig := "validSig1234567890123456789012345678901234567890123456"

-	CacheSignature(sessionID, text, sig)
+	CacheSignature(testModelName, text, sig)

 	// Fresh entry should be retrievable
-	if got := GetCachedSignature(sessionID, text); got != sig {
+	if got := GetCachedSignature(testModelName, text); got != sig {
 		t.Errorf("Fresh entry should be retrievable, got '%s'", got)
 	}

--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -118,6 +118,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	}

 	activatedProjects := make([]string, 0, len(projectSelections))
+	seenProjects := make(map[string]bool)
 	for _, candidateID := range projectSelections {
 		log.Infof("Activating project %s", candidateID)
 		if errSetup := performGeminiCLISetup(ctx, httpClient, storage, candidateID); errSetup != nil {
@@ -134,6 +135,13 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 		if finalID == "" {
 			finalID = candidateID
 		}
+
+		// Skip duplicates
+		if seenProjects[finalID] {
+			log.Infof("Project %s already activated, skipping", finalID)
+			continue
+		}
+		seenProjects[finalID] = true
 		activatedProjects = append(activatedProjects, finalID)
 	}

@@ -261,7 +269,39 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 			finalProjectID := projectID
 			if responseProjectID != "" {
 				if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
-					log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					// Check if this is a free user (gen-lang-client projects or free/legacy tier)
+					isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
+						strings.EqualFold(tierID, "FREE") ||
+						strings.EqualFold(tierID, "LEGACY")
+
+					if isFreeUser {
+						// Interactive prompt for free users
+						fmt.Printf("\nGoogle returned a different project ID:\n")
+						fmt.Printf("  Requested (frontend): %s\n", projectID)
+						fmt.Printf("  Returned (backend):   %s\n\n", responseProjectID)
+						fmt.Printf("  Backend project IDs have access to preview models (gemini-3-*).\n")
+						fmt.Printf("  This is normal for free tier users.\n\n")
+						fmt.Printf("Which project ID would you like to use?\n")
+						fmt.Printf("  [1] Backend (recommended): %s\n", responseProjectID)
+						fmt.Printf("  [2] Frontend: %s\n\n", projectID)
+						fmt.Printf("Enter choice [1]: ")
+
+						reader := bufio.NewReader(os.Stdin)
+						choice, _ := reader.ReadString('\n')
+						choice = strings.TrimSpace(choice)
+
+						if choice == "2" {
+							log.Infof("Using frontend project ID: %s", projectID)
+							fmt.Println(". Warning: Frontend project IDs may not have access to preview models.")
+							finalProjectID = projectID
+						} else {
+							log.Infof("Using backend project ID: %s (recommended)", responseProjectID)
+							finalProjectID = responseProjectID
+						}
+					} else {
+						// Pro users: keep requested project ID (original behavior)
+						log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					}
 				} else {
 					finalProjectID = responseProjectID
 				}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -6,12 +6,14 @@ package config

 import (
 	"bytes"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"strings"
 	"syscall"

+	log "github.com/sirupsen/logrus"
 	"golang.org/x/crypto/bcrypt"
 	"gopkg.in/yaml.v3"
 )
@@ -69,6 +71,11 @@ type Config struct {
 	// WebsocketAuth enables or disables authentication for the WebSocket API.
 	WebsocketAuth bool `yaml:"ws-auth" json:"ws-auth"`

+	// CodexInstructionsEnabled controls whether official Codex instructions are injected.
+	// When false (default), CodexInstructionsForModel returns immediately without modification.
+	// When true, the original instruction injection logic is used.
+	CodexInstructionsEnabled bool `yaml:"codex-instructions-enabled" json:"codex-instructions-enabled"`
+
 	// GeminiKey defines Gemini API key configurations with optional routing overrides.
 	GeminiKey []GeminiKey `yaml:"gemini-api-key" json:"gemini-api-key"`

@@ -91,13 +98,13 @@ type Config struct {
 	// OAuthExcludedModels defines per-provider global model exclusions applied to OAuth/file-backed auth entries.
 	OAuthExcludedModels map[string][]string `yaml:"oauth-excluded-models,omitempty" json:"oauth-excluded-models,omitempty"`

-	// OAuthModelMappings defines global model name mappings for OAuth/file-backed auth channels.
-	// These mappings affect both model listing and model routing for supported channels:
+	// OAuthModelAlias defines global model name aliases for OAuth/file-backed auth channels.
+	// These aliases affect both model listing and model routing for supported channels:
 	// gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
 	//
 	// NOTE: This does not apply to existing per-credential model alias features under:
 	// gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, and ampcode.
-	OAuthModelMappings map[string][]ModelNameMapping `yaml:"oauth-model-mappings,omitempty" json:"oauth-model-mappings,omitempty"`
+	OAuthModelAlias map[string][]OAuthModelAlias `yaml:"oauth-model-alias,omitempty" json:"oauth-model-alias,omitempty"`

 	// Payload defines default and override rules for provider payload parameters.
 	Payload PayloadConfig `yaml:"payload" json:"payload"`
@@ -145,11 +152,11 @@ type RoutingConfig struct {
 	Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"`
 }

-// ModelNameMapping defines a model ID mapping for a specific channel.
+// OAuthModelAlias defines a model ID alias for a specific channel.
 // It maps the upstream model name (Name) to the client-visible alias (Alias).
 // When Fork is true, the alias is added as an additional model in listings while
 // keeping the original model ID available.
-type ModelNameMapping struct {
+type OAuthModelAlias struct {
 	Name  string `yaml:"name" json:"name"`
 	Alias string `yaml:"alias" json:"alias"`
 	Fork  bool   `yaml:"fork,omitempty" json:"fork,omitempty"`
@@ -216,8 +223,12 @@ type AmpUpstreamAPIKeyEntry struct {
 type PayloadConfig struct {
 	// Default defines rules that only set parameters when they are missing in the payload.
 	Default []PayloadRule `yaml:"default" json:"default"`
+	// DefaultRaw defines rules that set raw JSON values only when they are missing.
+	DefaultRaw []PayloadRule `yaml:"default-raw" json:"default-raw"`
 	// Override defines rules that always set parameters, overwriting any existing values.
 	Override []PayloadRule `yaml:"override" json:"override"`
+	// OverrideRaw defines rules that always set raw JSON values, overwriting any existing values.
+	OverrideRaw []PayloadRule `yaml:"override-raw" json:"override-raw"`
 }

 // PayloadRule describes a single rule targeting a list of models with parameter updates.
@@ -225,6 +236,7 @@ type PayloadRule struct {
 	// Models lists model entries with name pattern and protocol constraint.
 	Models []PayloadModelRule `yaml:"models" json:"models"`
 	// Params maps JSON paths (gjson/sjson syntax) to values written into the payload.
+	// For *-raw rules, values are treated as raw JSON fragments (strings are used as-is).
 	Params map[string]any `yaml:"params" json:"params"`
 }

@@ -236,6 +248,25 @@ type PayloadModelRule struct {
 	Protocol string `yaml:"protocol" json:"protocol"`
 }

+// CloakConfig configures request cloaking for non-Claude-Code clients.
+// Cloaking disguises API requests to appear as originating from the official Claude Code CLI.
+type CloakConfig struct {
+	// Mode controls cloaking behavior: "auto" (default), "always", or "never".
+	// - "auto": cloak only when client is not Claude Code (based on User-Agent)
+	// - "always": always apply cloaking regardless of client
+	// - "never": never apply cloaking
+	Mode string `yaml:"mode,omitempty" json:"mode,omitempty"`
+
+	// StrictMode controls how system prompts are handled when cloaking.
+	// - false (default): prepend Claude Code prompt to user system messages
+	// - true: strip all user system messages, keep only Claude Code prompt
+	StrictMode bool `yaml:"strict-mode,omitempty" json:"strict-mode,omitempty"`
+
+	// SensitiveWords is a list of words to obfuscate with zero-width characters.
+	// This can help bypass certain content filters.
+	SensitiveWords []string `yaml:"sensitive-words,omitempty" json:"sensitive-words,omitempty"`
+}
+
 // ClaudeKey represents the configuration for a Claude API key,
 // including the API key itself and an optional base URL for the API endpoint.
 type ClaudeKey struct {
@@ -264,8 +295,14 @@ type ClaudeKey struct {

 	// ExcludedModels lists model IDs that should be excluded for this provider.
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
+
+	// Cloak configures request cloaking for non-Claude-Code clients.
+	Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"`
 }

+func (k ClaudeKey) GetAPIKey() string  { return k.APIKey }
+func (k ClaudeKey) GetBaseURL() string { return k.BaseURL }
+
 // ClaudeModel describes a mapping between an alias and the actual upstream model name.
 type ClaudeModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -308,6 +345,9 @@ type CodexKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }

+func (k CodexKey) GetAPIKey() string  { return k.APIKey }
+func (k CodexKey) GetBaseURL() string { return k.BaseURL }
+
 // CodexModel describes a mapping between an alias and the actual upstream model name.
 type CodexModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -349,6 +389,9 @@ type GeminiKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }

+func (k GeminiKey) GetAPIKey() string  { return k.APIKey }
+func (k GeminiKey) GetBaseURL() string { return k.BaseURL }
+
 // GeminiModel describes a mapping between an alias and the actual upstream model name.
 type GeminiModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -406,6 +449,9 @@ type OpenAICompatibilityModel struct {
 	Alias string `yaml:"alias" json:"alias"`
 }

+func (m OpenAICompatibilityModel) GetName() string  { return m.Name }
+func (m OpenAICompatibilityModel) GetAlias() string { return m.Alias }
+
 // LoadConfig reads a YAML configuration file from the given path,
 // unmarshals it into a Config struct, applies environment variable overrides,
 // and returns it.
@@ -424,6 +470,15 @@ func LoadConfig(configFile string) (*Config, error) {
 // If optional is true and the file is missing, it returns an empty Config.
 // If optional is true and the file is empty or invalid, it returns an empty Config.
 func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
+	// Perform oauth-model-alias migration before loading config.
+	// This migrates oauth-model-mappings to oauth-model-alias if needed.
+	if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
+		// Log warning but don't fail - config loading should still work
+		fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
+	} else if migrated {
+		fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
+	}
+
 	// Read the entire configuration file into memory.
 	data, err := os.ReadFile(configFile)
 	if err != nil {
@@ -516,8 +571,11 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Normalize OAuth provider model exclusion map.
 	cfg.OAuthExcludedModels = NormalizeOAuthExcludedModels(cfg.OAuthExcludedModels)

-	// Normalize global OAuth model name mappings.
-	cfg.SanitizeOAuthModelMappings()
+	// Normalize global OAuth model name aliases.
+	cfg.SanitizeOAuthModelAlias()
+
+	// Validate raw payload rules and drop invalid entries.
+	cfg.SanitizePayloadRules()

 	if cfg.legacyMigrationPending {
 		fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
@@ -535,24 +593,79 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	return &cfg, nil
 }

-// SanitizeOAuthModelMappings normalizes and deduplicates global OAuth model name mappings.
-// It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
-// allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
-func (cfg *Config) SanitizeOAuthModelMappings() {
-	if cfg == nil || len(cfg.OAuthModelMappings) == 0 {
+// SanitizePayloadRules validates raw JSON payload rule params and drops invalid rules.
+func (cfg *Config) SanitizePayloadRules() {
+	if cfg == nil {
 		return
 	}
-	out := make(map[string][]ModelNameMapping, len(cfg.OAuthModelMappings))
-	for rawChannel, mappings := range cfg.OAuthModelMappings {
-		channel := strings.ToLower(strings.TrimSpace(rawChannel))
-		if channel == "" || len(mappings) == 0 {
+	cfg.Payload.DefaultRaw = sanitizePayloadRawRules(cfg.Payload.DefaultRaw, "default-raw")
+	cfg.Payload.OverrideRaw = sanitizePayloadRawRules(cfg.Payload.OverrideRaw, "override-raw")
+}
+
+func sanitizePayloadRawRules(rules []PayloadRule, section string) []PayloadRule {
+	if len(rules) == 0 {
+		return rules
+	}
+	out := make([]PayloadRule, 0, len(rules))
+	for i := range rules {
+		rule := rules[i]
+		if len(rule.Params) == 0 {
 			continue
 		}
-		seenAlias := make(map[string]struct{}, len(mappings))
-		clean := make([]ModelNameMapping, 0, len(mappings))
-		for _, mapping := range mappings {
-			name := strings.TrimSpace(mapping.Name)
-			alias := strings.TrimSpace(mapping.Alias)
+		invalid := false
+		for path, value := range rule.Params {
+			raw, ok := payloadRawString(value)
+			if !ok {
+				continue
+			}
+			trimmed := bytes.TrimSpace(raw)
+			if len(trimmed) == 0 || !json.Valid(trimmed) {
+				log.WithFields(log.Fields{
+					"section":    section,
+					"rule_index": i + 1,
+					"param":      path,
+				}).Warn("payload rule dropped: invalid raw JSON")
+				invalid = true
+				break
+			}
+		}
+		if invalid {
+			continue
+		}
+		out = append(out, rule)
+	}
+	return out
+}
+
+func payloadRawString(value any) ([]byte, bool) {
+	switch typed := value.(type) {
+	case string:
+		return []byte(typed), true
+	case []byte:
+		return typed, true
+	default:
+		return nil, false
+	}
+}
+
+// SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases.
+// It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
+// allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
+func (cfg *Config) SanitizeOAuthModelAlias() {
+	if cfg == nil || len(cfg.OAuthModelAlias) == 0 {
+		return
+	}
+	out := make(map[string][]OAuthModelAlias, len(cfg.OAuthModelAlias))
+	for rawChannel, aliases := range cfg.OAuthModelAlias {
+		channel := strings.ToLower(strings.TrimSpace(rawChannel))
+		if channel == "" || len(aliases) == 0 {
+			continue
+		}
+		seenAlias := make(map[string]struct{}, len(aliases))
+		clean := make([]OAuthModelAlias, 0, len(aliases))
+		for _, entry := range aliases {
+			name := strings.TrimSpace(entry.Name)
+			alias := strings.TrimSpace(entry.Alias)
 			if name == "" || alias == "" {
 				continue
 			}
@@ -564,13 +677,13 @@ func (cfg *Config) SanitizeOAuthModelMappings() {
 				continue
 			}
 			seenAlias[aliasKey] = struct{}{}
-			clean = append(clean, ModelNameMapping{Name: name, Alias: alias, Fork: mapping.Fork})
+			clean = append(clean, OAuthModelAlias{Name: name, Alias: alias, Fork: entry.Fork})
 		}
 		if len(clean) > 0 {
 			out[channel] = clean
 		}
 	}
-	cfg.OAuthModelMappings = out
+	cfg.OAuthModelAlias = out
 }

 // SanitizeOpenAICompatibility removes OpenAI-compatibility provider entries that are
@@ -810,6 +923,7 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {
 	removeLegacyGenerativeLanguageKeys(original.Content[0])

 	pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-excluded-models")
+	pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-model-alias")

 	// Merge generated into original in-place, preserving comments/order of existing nodes.
 	mergeMappingPreserve(original.Content[0], generated.Content[0])
@@ -1300,6 +1414,16 @@ func pruneMappingToGeneratedKeys(dstRoot, srcRoot *yaml.Node, key string) {
 	}
 	srcIdx := findMapKeyIndex(srcRoot, key)
 	if srcIdx < 0 {
+		// Keep an explicit empty mapping for oauth-model-alias when it was previously present.
+		//
+		// Rationale: LoadConfig runs MigrateOAuthModelAlias before unmarshalling. If the
+		// oauth-model-alias key is missing, migration will add the default antigravity aliases.
+		// When users delete the last channel from oauth-model-alias via the management API,
+		// we want that deletion to persist across hot reloads and restarts.
+		if key == "oauth-model-alias" {
+			dstRoot.Content[dstIdx+1] = &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
+			return
+		}
 		removeMapKey(dstRoot, key)
 		return
 	}
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -0,0 +1,275 @@
+package config
+
+import (
+	"os"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+
+// antigravityModelConversionTable maps old built-in aliases to actual model names
+// for the antigravity channel during migration.
+var antigravityModelConversionTable = map[string]string{
+	"gemini-2.5-computer-use-preview-10-2025": "rev19-uic3-1p",
+	"gemini-3-pro-image-preview":              "gemini-3-pro-image",
+	"gemini-3-pro-preview":                    "gemini-3-pro-high",
+	"gemini-3-flash-preview":                  "gemini-3-flash",
+	"gemini-claude-sonnet-4-5":                "claude-sonnet-4-5",
+	"gemini-claude-sonnet-4-5-thinking":       "claude-sonnet-4-5-thinking",
+	"gemini-claude-opus-4-5-thinking":         "claude-opus-4-5-thinking",
+}
+
+// defaultAntigravityAliases returns the default oauth-model-alias configuration
+// for the antigravity channel when neither field exists.
+func defaultAntigravityAliases() []OAuthModelAlias {
+	return []OAuthModelAlias{
+		{Name: "rev19-uic3-1p", Alias: "gemini-2.5-computer-use-preview-10-2025"},
+		{Name: "gemini-3-pro-image", Alias: "gemini-3-pro-image-preview"},
+		{Name: "gemini-3-pro-high", Alias: "gemini-3-pro-preview"},
+		{Name: "gemini-3-flash", Alias: "gemini-3-flash-preview"},
+		{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
+		{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
+		{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
+	}
+}
+
+// MigrateOAuthModelAlias checks for and performs migration from oauth-model-mappings
+// to oauth-model-alias at startup. Returns true if migration was performed.
+//
+// Migration flow:
+// 1. Check if oauth-model-alias exists -> skip migration
+// 2. Check if oauth-model-mappings exists -> convert and migrate
+//   - For antigravity channel, convert old built-in aliases to actual model names
+//
+// 3. Neither exists -> add default antigravity config
+func MigrateOAuthModelAlias(configFile string) (bool, error) {
+	data, err := os.ReadFile(configFile)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return false, nil
+		}
+		return false, err
+	}
+	if len(data) == 0 {
+		return false, nil
+	}
+
+	// Parse YAML into node tree to preserve structure
+	var root yaml.Node
+	if err := yaml.Unmarshal(data, &root); err != nil {
+		return false, nil
+	}
+	if root.Kind != yaml.DocumentNode || len(root.Content) == 0 {
+		return false, nil
+	}
+	rootMap := root.Content[0]
+	if rootMap == nil || rootMap.Kind != yaml.MappingNode {
+		return false, nil
+	}
+
+	// Check if oauth-model-alias already exists
+	if findMapKeyIndex(rootMap, "oauth-model-alias") >= 0 {
+		return false, nil
+	}
+
+	// Check if oauth-model-mappings exists
+	oldIdx := findMapKeyIndex(rootMap, "oauth-model-mappings")
+	if oldIdx >= 0 {
+		// Migrate from old field
+		return migrateFromOldField(configFile, &root, rootMap, oldIdx)
+	}
+
+	// Neither field exists - add default antigravity config
+	return addDefaultAntigravityConfig(configFile, &root, rootMap)
+}
+
+// migrateFromOldField converts oauth-model-mappings to oauth-model-alias
+func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node, oldIdx int) (bool, error) {
+	if oldIdx+1 >= len(rootMap.Content) {
+		return false, nil
+	}
+	oldValue := rootMap.Content[oldIdx+1]
+	if oldValue == nil || oldValue.Kind != yaml.MappingNode {
+		return false, nil
+	}
+
+	// Parse the old aliases
+	oldAliases := parseOldAliasNode(oldValue)
+	if len(oldAliases) == 0 {
+		// Remove the old field and write
+		removeMapKeyByIndex(rootMap, oldIdx)
+		return writeYAMLNode(configFile, root)
+	}
+
+	// Convert model names for antigravity channel
+	newAliases := make(map[string][]OAuthModelAlias, len(oldAliases))
+	for channel, entries := range oldAliases {
+		converted := make([]OAuthModelAlias, 0, len(entries))
+		for _, entry := range entries {
+			newEntry := OAuthModelAlias{
+				Name:  entry.Name,
+				Alias: entry.Alias,
+				Fork:  entry.Fork,
+			}
+			// Convert model names for antigravity channel
+			if strings.EqualFold(channel, "antigravity") {
+				if actual, ok := antigravityModelConversionTable[entry.Name]; ok {
+					newEntry.Name = actual
+				}
+			}
+			converted = append(converted, newEntry)
+		}
+		newAliases[channel] = converted
+	}
+
+	// For antigravity channel, supplement missing default aliases
+	if antigravityEntries, exists := newAliases["antigravity"]; exists {
+		// Build a set of already configured model names (upstream names)
+		configuredModels := make(map[string]bool, len(antigravityEntries))
+		for _, entry := range antigravityEntries {
+			configuredModels[entry.Name] = true
+		}
+
+		// Add missing default aliases
+		for _, defaultAlias := range defaultAntigravityAliases() {
+			if !configuredModels[defaultAlias.Name] {
+				antigravityEntries = append(antigravityEntries, defaultAlias)
+			}
+		}
+		newAliases["antigravity"] = antigravityEntries
+	}
+
+	// Build new node
+	newNode := buildOAuthModelAliasNode(newAliases)
+
+	// Replace old key with new key and value
+	rootMap.Content[oldIdx].Value = "oauth-model-alias"
+	rootMap.Content[oldIdx+1] = newNode
+
+	return writeYAMLNode(configFile, root)
+}
+
+// addDefaultAntigravityConfig adds the default antigravity configuration
+func addDefaultAntigravityConfig(configFile string, root *yaml.Node, rootMap *yaml.Node) (bool, error) {
+	defaults := map[string][]OAuthModelAlias{
+		"antigravity": defaultAntigravityAliases(),
+	}
+	newNode := buildOAuthModelAliasNode(defaults)
+
+	// Add new key-value pair
+	keyNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "oauth-model-alias"}
+	rootMap.Content = append(rootMap.Content, keyNode, newNode)
+
+	return writeYAMLNode(configFile, root)
+}
+
+// parseOldAliasNode parses the old oauth-model-mappings node structure
+func parseOldAliasNode(node *yaml.Node) map[string][]OAuthModelAlias {
+	if node == nil || node.Kind != yaml.MappingNode {
+		return nil
+	}
+	result := make(map[string][]OAuthModelAlias)
+	for i := 0; i+1 < len(node.Content); i += 2 {
+		channelNode := node.Content[i]
+		entriesNode := node.Content[i+1]
+		if channelNode == nil || entriesNode == nil {
+			continue
+		}
+		channel := strings.ToLower(strings.TrimSpace(channelNode.Value))
+		if channel == "" || entriesNode.Kind != yaml.SequenceNode {
+			continue
+		}
+		entries := make([]OAuthModelAlias, 0, len(entriesNode.Content))
+		for _, entryNode := range entriesNode.Content {
+			if entryNode == nil || entryNode.Kind != yaml.MappingNode {
+				continue
+			}
+			entry := parseAliasEntry(entryNode)
+			if entry.Name != "" && entry.Alias != "" {
+				entries = append(entries, entry)
+			}
+		}
+		if len(entries) > 0 {
+			result[channel] = entries
+		}
+	}
+	return result
+}
+
+// parseAliasEntry parses a single alias entry node
+func parseAliasEntry(node *yaml.Node) OAuthModelAlias {
+	var entry OAuthModelAlias
+	for i := 0; i+1 < len(node.Content); i += 2 {
+		keyNode := node.Content[i]
+		valNode := node.Content[i+1]
+		if keyNode == nil || valNode == nil {
+			continue
+		}
+		switch strings.ToLower(strings.TrimSpace(keyNode.Value)) {
+		case "name":
+			entry.Name = strings.TrimSpace(valNode.Value)
+		case "alias":
+			entry.Alias = strings.TrimSpace(valNode.Value)
+		case "fork":
+			entry.Fork = strings.ToLower(strings.TrimSpace(valNode.Value)) == "true"
+		}
+	}
+	return entry
+}
+
+// buildOAuthModelAliasNode creates a YAML node for oauth-model-alias
+func buildOAuthModelAliasNode(aliases map[string][]OAuthModelAlias) *yaml.Node {
+	node := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
+	for channel, entries := range aliases {
+		channelNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: channel}
+		entriesNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
+		for _, entry := range entries {
+			entryNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
+			entryNode.Content = append(entryNode.Content,
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "name"},
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Name},
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "alias"},
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Alias},
+			)
+			if entry.Fork {
+				entryNode.Content = append(entryNode.Content,
+					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "fork"},
+					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!bool", Value: "true"},
+				)
+			}
+			entriesNode.Content = append(entriesNode.Content, entryNode)
+		}
+		node.Content = append(node.Content, channelNode, entriesNode)
+	}
+	return node
+}
+
+// removeMapKeyByIndex removes a key-value pair from a mapping node by index
+func removeMapKeyByIndex(mapNode *yaml.Node, keyIdx int) {
+	if mapNode == nil || mapNode.Kind != yaml.MappingNode {
+		return
+	}
+	if keyIdx < 0 || keyIdx+1 >= len(mapNode.Content) {
+		return
+	}
+	mapNode.Content = append(mapNode.Content[:keyIdx], mapNode.Content[keyIdx+2:]...)
+}
+
+// writeYAMLNode writes the YAML node tree back to file
+func writeYAMLNode(configFile string, root *yaml.Node) (bool, error) {
+	f, err := os.Create(configFile)
+	if err != nil {
+		return false, err
+	}
+	defer f.Close()
+
+	enc := yaml.NewEncoder(f)
+	enc.SetIndent(2)
+	if err := enc.Encode(root); err != nil {
+		return false, err
+	}
+	if err := enc.Close(); err != nil {
+		return false, err
+	}
+	return true, nil
+}
--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -0,0 +1,242 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"gopkg.in/yaml.v3"
+)
+
+func TestMigrateOAuthModelAlias_SkipsIfNewFieldExists(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `oauth-model-alias:
+  gemini-cli:
+    - name: "gemini-2.5-pro"
+      alias: "g2.5p"
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if migrated {
+		t.Fatal("expected no migration when oauth-model-alias already exists")
+	}
+
+	// Verify file unchanged
+	data, _ := os.ReadFile(configFile)
+	if !strings.Contains(string(data), "oauth-model-alias:") {
+		t.Fatal("file should still contain oauth-model-alias")
+	}
+}
+
+func TestMigrateOAuthModelAlias_MigratesOldField(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `oauth-model-mappings:
+  gemini-cli:
+    - name: "gemini-2.5-pro"
+      alias: "g2.5p"
+      fork: true
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to occur")
+	}
+
+	// Verify new field exists and old field removed
+	data, _ := os.ReadFile(configFile)
+	if strings.Contains(string(data), "oauth-model-mappings:") {
+		t.Fatal("old field should be removed")
+	}
+	if !strings.Contains(string(data), "oauth-model-alias:") {
+		t.Fatal("new field should exist")
+	}
+
+	// Parse and verify structure
+	var root yaml.Node
+	if err := yaml.Unmarshal(data, &root); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	// Use old model names that should be converted
+	content := `oauth-model-mappings:
+  antigravity:
+    - name: "gemini-2.5-computer-use-preview-10-2025"
+      alias: "computer-use"
+    - name: "gemini-3-pro-preview"
+      alias: "g3p"
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to occur")
+	}
+
+	// Verify model names were converted
+	data, _ := os.ReadFile(configFile)
+	content = string(data)
+	if !strings.Contains(content, "rev19-uic3-1p") {
+		t.Fatal("expected gemini-2.5-computer-use-preview-10-2025 to be converted to rev19-uic3-1p")
+	}
+	if !strings.Contains(content, "gemini-3-pro-high") {
+		t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high")
+	}
+
+	// Verify missing default aliases were supplemented
+	if !strings.Contains(content, "gemini-3-pro-image") {
+		t.Fatal("expected missing default alias gemini-3-pro-image to be added")
+	}
+	if !strings.Contains(content, "gemini-3-flash") {
+		t.Fatal("expected missing default alias gemini-3-flash to be added")
+	}
+	if !strings.Contains(content, "claude-sonnet-4-5") {
+		t.Fatal("expected missing default alias claude-sonnet-4-5 to be added")
+	}
+	if !strings.Contains(content, "claude-sonnet-4-5-thinking") {
+		t.Fatal("expected missing default alias claude-sonnet-4-5-thinking to be added")
+	}
+	if !strings.Contains(content, "claude-opus-4-5-thinking") {
+		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
+	}
+}
+
+func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `debug: true
+port: 8080
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to add default config")
+	}
+
+	// Verify default antigravity config was added
+	data, _ := os.ReadFile(configFile)
+	content = string(data)
+	if !strings.Contains(content, "oauth-model-alias:") {
+		t.Fatal("expected oauth-model-alias to be added")
+	}
+	if !strings.Contains(content, "antigravity:") {
+		t.Fatal("expected antigravity channel to be added")
+	}
+	if !strings.Contains(content, "rev19-uic3-1p") {
+		t.Fatal("expected default antigravity aliases to include rev19-uic3-1p")
+	}
+}
+
+func TestMigrateOAuthModelAlias_PreservesOtherConfig(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `debug: true
+port: 8080
+oauth-model-mappings:
+  gemini-cli:
+    - name: "test"
+      alias: "t"
+api-keys:
+  - "key1"
+  - "key2"
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to occur")
+	}
+
+	// Verify other config preserved
+	data, _ := os.ReadFile(configFile)
+	content = string(data)
+	if !strings.Contains(content, "debug: true") {
+		t.Fatal("expected debug field to be preserved")
+	}
+	if !strings.Contains(content, "port: 8080") {
+		t.Fatal("expected port field to be preserved")
+	}
+	if !strings.Contains(content, "api-keys:") {
+		t.Fatal("expected api-keys field to be preserved")
+	}
+}
+
+func TestMigrateOAuthModelAlias_NonexistentFile(t *testing.T) {
+	t.Parallel()
+
+	migrated, err := MigrateOAuthModelAlias("/nonexistent/path/config.yaml")
+	if err != nil {
+		t.Fatalf("unexpected error for nonexistent file: %v", err)
+	}
+	if migrated {
+		t.Fatal("expected no migration for nonexistent file")
+	}
+}
+
+func TestMigrateOAuthModelAlias_EmptyFile(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	if err := os.WriteFile(configFile, []byte(""), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if migrated {
+		t.Fatal("expected no migration for empty file")
+	}
+}
--- a/internal/config/oauth_model_alias_test.go
+++ b/internal/config/oauth_model_alias_test.go
@@ -0,0 +1,56 @@
+package config
+
+import "testing"
+
+func TestSanitizeOAuthModelAlias_PreservesForkFlag(t *testing.T) {
+	cfg := &Config{
+		OAuthModelAlias: map[string][]OAuthModelAlias{
+			" CoDeX ": {
+				{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
+				{Name: "gpt-6", Alias: "g6"},
+			},
+		},
+	}
+
+	cfg.SanitizeOAuthModelAlias()
+
+	aliases := cfg.OAuthModelAlias["codex"]
+	if len(aliases) != 2 {
+		t.Fatalf("expected 2 sanitized aliases, got %d", len(aliases))
+	}
+	if aliases[0].Name != "gpt-5" || aliases[0].Alias != "g5" || !aliases[0].Fork {
+		t.Fatalf("expected first alias to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", aliases[0].Name, aliases[0].Alias, aliases[0].Fork)
+	}
+	if aliases[1].Name != "gpt-6" || aliases[1].Alias != "g6" || aliases[1].Fork {
+		t.Fatalf("expected second alias to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", aliases[1].Name, aliases[1].Alias, aliases[1].Fork)
+	}
+}
+
+func TestSanitizeOAuthModelAlias_AllowsMultipleAliasesForSameName(t *testing.T) {
+	cfg := &Config{
+		OAuthModelAlias: map[string][]OAuthModelAlias{
+			"antigravity": {
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
+			},
+		},
+	}
+
+	cfg.SanitizeOAuthModelAlias()
+
+	aliases := cfg.OAuthModelAlias["antigravity"]
+	expected := []OAuthModelAlias{
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
+	}
+	if len(aliases) != len(expected) {
+		t.Fatalf("expected %d sanitized aliases, got %d", len(expected), len(aliases))
+	}
+	for i, exp := range expected {
+		if aliases[i].Name != exp.Name || aliases[i].Alias != exp.Alias || aliases[i].Fork != exp.Fork {
+			t.Fatalf("expected alias %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, aliases[i].Name, aliases[i].Alias, aliases[i].Fork)
+		}
+	}
+}
--- a/internal/config/oauth_model_mappings_test.go
+++ b/internal/config/oauth_model_mappings_test.go
@@ -1,56 +0,0 @@
-package config
-
-import "testing"
-
-func TestSanitizeOAuthModelMappings_PreservesForkFlag(t *testing.T) {
-	cfg := &Config{
-		OAuthModelMappings: map[string][]ModelNameMapping{
-			" CoDeX ": {
-				{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
-				{Name: "gpt-6", Alias: "g6"},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelMappings()
-
-	mappings := cfg.OAuthModelMappings["codex"]
-	if len(mappings) != 2 {
-		t.Fatalf("expected 2 sanitized mappings, got %d", len(mappings))
-	}
-	if mappings[0].Name != "gpt-5" || mappings[0].Alias != "g5" || !mappings[0].Fork {
-		t.Fatalf("expected first mapping to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", mappings[0].Name, mappings[0].Alias, mappings[0].Fork)
-	}
-	if mappings[1].Name != "gpt-6" || mappings[1].Alias != "g6" || mappings[1].Fork {
-		t.Fatalf("expected second mapping to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", mappings[1].Name, mappings[1].Alias, mappings[1].Fork)
-	}
-}
-
-func TestSanitizeOAuthModelMappings_AllowsMultipleAliasesForSameName(t *testing.T) {
-	cfg := &Config{
-		OAuthModelMappings: map[string][]ModelNameMapping{
-			"antigravity": {
-				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
-				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
-				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelMappings()
-
-	mappings := cfg.OAuthModelMappings["antigravity"]
-	expected := []ModelNameMapping{
-		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
-		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
-		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
-	}
-	if len(mappings) != len(expected) {
-		t.Fatalf("expected %d sanitized mappings, got %d", len(expected), len(mappings))
-	}
-	for i, exp := range expected {
-		if mappings[i].Name != exp.Name || mappings[i].Alias != exp.Alias || mappings[i].Fork != exp.Fork {
-			t.Fatalf("expected mapping %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, mappings[i].Name, mappings[i].Alias, mappings[i].Fork)
-		}
-	}
-}
--- a/internal/config/vertex_compat.go
+++ b/internal/config/vertex_compat.go
@@ -36,6 +36,9 @@ type VertexCompatKey struct {
 	Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"`
 }

+func (k VertexCompatKey) GetAPIKey() string  { return k.APIKey }
+func (k VertexCompatKey) GetBaseURL() string { return k.BaseURL }
+
 // VertexCompatModel represents a model configuration for Vertex compatibility,
 // including the actual model name and its alias for API routing.
 type VertexCompatModel struct {
--- a/internal/logging/gin_logger.go
+++ b/internal/logging/gin_logger.go
@@ -4,6 +4,7 @@
 package logging

 import (
+	"errors"
 	"fmt"
 	"net/http"
 	"runtime/debug"
@@ -112,6 +113,11 @@ func isAIAPIPath(path string) bool {
 //   - gin.HandlerFunc: A middleware handler for panic recovery
 func GinLogrusRecovery() gin.HandlerFunc {
 	return gin.CustomRecovery(func(c *gin.Context, recovered interface{}) {
+		if err, ok := recovered.(error); ok && errors.Is(err, http.ErrAbortHandler) {
+			// Let net/http handle ErrAbortHandler so the connection is aborted without noisy stack logs.
+			panic(http.ErrAbortHandler)
+		}
+
 		log.WithFields(log.Fields{
 			"panic": recovered,
 			"stack": string(debug.Stack()),
--- a/internal/logging/gin_logger_test.go
+++ b/internal/logging/gin_logger_test.go
@@ -0,0 +1,60 @@
+package logging
+
+import (
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+)
+
+func TestGinLogrusRecoveryRepanicsErrAbortHandler(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	engine := gin.New()
+	engine.Use(GinLogrusRecovery())
+	engine.GET("/abort", func(c *gin.Context) {
+		panic(http.ErrAbortHandler)
+	})
+
+	req := httptest.NewRequest(http.MethodGet, "/abort", nil)
+	recorder := httptest.NewRecorder()
+
+	defer func() {
+		recovered := recover()
+		if recovered == nil {
+			t.Fatalf("expected panic, got nil")
+		}
+		err, ok := recovered.(error)
+		if !ok {
+			t.Fatalf("expected error panic, got %T", recovered)
+		}
+		if !errors.Is(err, http.ErrAbortHandler) {
+			t.Fatalf("expected ErrAbortHandler, got %v", err)
+		}
+		if err != http.ErrAbortHandler {
+			t.Fatalf("expected exact ErrAbortHandler sentinel, got %v", err)
+		}
+	}()
+
+	engine.ServeHTTP(recorder, req)
+}
+
+func TestGinLogrusRecoveryHandlesRegularPanic(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	engine := gin.New()
+	engine.Use(GinLogrusRecovery())
+	engine.GET("/panic", func(c *gin.Context) {
+		panic("boom")
+	})
+
+	req := httptest.NewRequest(http.MethodGet, "/panic", nil)
+	recorder := httptest.NewRecorder()
+
+	engine.ServeHTTP(recorder, req)
+	if recorder.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500, got %d", recorder.Code)
+	}
+}
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -29,6 +29,9 @@ var (
 // Format: [2025-12-23 20:14:04] [debug] [manager.go:524] | a1b2c3d4 | Use API key sk-9...0RHO for model gpt-5.2
 type LogFormatter struct{}

+// logFieldOrder defines the display order for common log fields.
+var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_mode", "original_value", "min", "max", "clamped_to", "error"}
+
 // Format renders a single log entry with custom formatting.
 func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	var buffer *bytes.Buffer
@@ -52,11 +55,25 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	}
 	levelStr := fmt.Sprintf("%-5s", level)

+	// Build fields string (only print fields in logFieldOrder)
+	var fieldsStr string
+	if len(entry.Data) > 0 {
+		var fields []string
+		for _, k := range logFieldOrder {
+			if v, ok := entry.Data[k]; ok {
+				fields = append(fields, fmt.Sprintf("%s=%v", k, v))
+			}
+		}
+		if len(fields) > 0 {
+			fieldsStr = " " + strings.Join(fields, " ")
+		}
+	}
+
 	var formatted string
 	if entry.Caller != nil {
-		formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message)
+		formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s%s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message, fieldsStr)
 	} else {
-		formatted = fmt.Sprintf("[%s] [%s] [%s] %s\n", timestamp, reqID, levelStr, message)
+		formatted = fmt.Sprintf("[%s] [%s] [%s] %s%s\n", timestamp, reqID, levelStr, message, fieldsStr)
 	}
 	buffer.WriteString(formatted)

@@ -104,6 +121,24 @@ func isDirWritable(dir string) bool {
 	return true
 }

+// ResolveLogDirectory determines the directory used for application logs.
+func ResolveLogDirectory(cfg *config.Config) string {
+	logDir := "logs"
+	if base := util.WritablePath(); base != "" {
+		return filepath.Join(base, "logs")
+	}
+	if cfg == nil {
+		return logDir
+	}
+	if !isDirWritable(logDir) {
+		authDir := strings.TrimSpace(cfg.AuthDir)
+		if authDir != "" {
+			logDir = filepath.Join(authDir, "logs")
+		}
+	}
+	return logDir
+}
+
 // ConfigureLogOutput switches the global log destination between rotating files and stdout.
 // When logsMaxTotalSizeMB > 0, a background cleaner removes the oldest log files in the logs directory
 // until the total size is within the limit.
@@ -113,12 +148,7 @@ func ConfigureLogOutput(cfg *config.Config) error {
 	writerMu.Lock()
 	defer writerMu.Unlock()

-	logDir := "logs"
-	if base := util.WritablePath(); base != "" {
-		logDir = filepath.Join(base, "logs")
-	} else if !isDirWritable(logDir) {
-		logDir = filepath.Join(cfg.AuthDir, "logs")
-	}
+	logDir := ResolveLogDirectory(cfg)

 	protectedPath := ""
 	if cfg.LoggingToFile {
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -44,10 +44,12 @@ type RequestLogger interface {
 	//   - apiRequest: The API request data
 	//   - apiResponse: The API response data
 	//   - requestID: Optional request ID for log file naming
+	//   - requestTimestamp: When the request was received
+	//   - apiResponseTimestamp: When the API response was received
 	//
 	// Returns:
 	//   - error: An error if logging fails, nil otherwise
-	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error
+	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error

 	// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
 	//
@@ -109,6 +111,12 @@ type StreamingLogWriter interface {
 	//   - error: An error if writing fails, nil otherwise
 	WriteAPIResponse(apiResponse []byte) error

+	// SetFirstChunkTimestamp sets the TTFB timestamp captured when first chunk was received.
+	//
+	// Parameters:
+	//   - timestamp: The time when first response chunk was received
+	SetFirstChunkTimestamp(timestamp time.Time)
+
 	// Close finalizes the log file and cleans up resources.
 	//
 	// Returns:
@@ -180,20 +188,22 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
 //   - apiRequest: The API request data
 //   - apiResponse: The API response data
 //   - requestID: Optional request ID for log file naming
+//   - requestTimestamp: When the request was received
+//   - apiResponseTimestamp: When the API response was received
 //
 // Returns:
 //   - error: An error if logging fails, nil otherwise
-func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID)
+func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
 }

 // LogRequestWithOptions logs a request with optional forced logging behavior.
 // The force flag allows writing error logs even when regular request logging is disabled.
-func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID)
+func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
 }

-func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
+func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
 	if !l.enabled && !force {
 		return nil
 	}
@@ -247,6 +257,8 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
 		responseHeaders,
 		responseToWrite,
 		decompressErr,
+		requestTimestamp,
+		apiResponseTimestamp,
 	)
 	if errClose := logFile.Close(); errClose != nil {
 		log.WithError(errClose).Warn("failed to close request log file")
@@ -499,17 +511,22 @@ func (l *FileRequestLogger) writeNonStreamingLog(
 	responseHeaders map[string][]string,
 	response []byte,
 	decompressErr error,
+	requestTimestamp time.Time,
+	apiResponseTimestamp time.Time,
 ) error {
-	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, time.Now()); errWrite != nil {
+	if requestTimestamp.IsZero() {
+		requestTimestamp = time.Now()
+	}
+	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest); errWrite != nil {
+	if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest, time.Time{}); errWrite != nil {
 		return errWrite
 	}
 	if errWrite := writeAPIErrorResponses(w, apiResponseErrors); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse); errWrite != nil {
+	if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse, apiResponseTimestamp); errWrite != nil {
 		return errWrite
 	}
 	return writeResponseSection(w, statusCode, true, responseHeaders, bytes.NewReader(response), decompressErr, true)
@@ -583,7 +600,7 @@ func writeRequestInfoWithBody(
 	return nil
 }

-func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte) error {
+func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte, timestamp time.Time) error {
 	if len(payload) == 0 {
 		return nil
 	}
@@ -601,6 +618,11 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
 		if _, errWrite := io.WriteString(w, sectionHeader); errWrite != nil {
 			return errWrite
 		}
+		if !timestamp.IsZero() {
+			if _, errWrite := io.WriteString(w, fmt.Sprintf("Timestamp: %s\n", timestamp.Format(time.RFC3339Nano))); errWrite != nil {
+				return errWrite
+			}
+		}
 		if _, errWrite := w.Write(payload); errWrite != nil {
 			return errWrite
 		}
@@ -974,6 +996,9 @@ type FileStreamingLogWriter struct {

 	// apiResponse stores the upstream API response data.
 	apiResponse []byte
+
+	// apiResponseTimestamp captures when the API response was received.
+	apiResponseTimestamp time.Time
 }

 // WriteChunkAsync writes a response chunk asynchronously (non-blocking).
@@ -1053,6 +1078,12 @@ func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
 	return nil
 }

+func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
+	if !timestamp.IsZero() {
+		w.apiResponseTimestamp = timestamp
+	}
+}
+
 // Close finalizes the log file and cleans up resources.
 // It writes all buffered data to the file in the correct order:
 // API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
@@ -1140,10 +1171,10 @@ func (w *FileStreamingLogWriter) writeFinalLog(logFile *os.File) error {
 	if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest); errWrite != nil {
+	if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest, time.Time{}); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse); errWrite != nil {
+	if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse, w.apiResponseTimestamp); errWrite != nil {
 		return errWrite
 	}

@@ -1220,6 +1251,8 @@ func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
 	return nil
 }

+func (w *NoOpStreamingLogWriter) SetFirstChunkTimestamp(_ time.Time) {}
+
 // Close is a no-op implementation that does nothing and always returns nil.
 //
 // Returns:
--- a/internal/misc/codex_instructions.go
+++ b/internal/misc/codex_instructions.go
@@ -7,11 +7,27 @@ import (
 	"embed"
 	_ "embed"
 	"strings"
+	"sync/atomic"

 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )

+// codexInstructionsEnabled controls whether CodexInstructionsForModel returns official instructions.
+// When false (default), CodexInstructionsForModel returns (true, "") immediately.
+// Set via SetCodexInstructionsEnabled from config.
+var codexInstructionsEnabled atomic.Bool
+
+// SetCodexInstructionsEnabled sets whether codex instructions processing is enabled.
+func SetCodexInstructionsEnabled(enabled bool) {
+	codexInstructionsEnabled.Store(enabled)
+}
+
+// GetCodexInstructionsEnabled returns whether codex instructions processing is enabled.
+func GetCodexInstructionsEnabled() bool {
+	return codexInstructionsEnabled.Load()
+}
+
 //go:embed codex_instructions
 var codexInstructionsDir embed.FS

@@ -124,6 +140,9 @@ func codexInstructionsForCodex(modelName, systemInstructions string) (bool, stri
 }

 func CodexInstructionsForModel(modelName, systemInstructions, userAgent string) (bool, string) {
+	if !GetCodexInstructionsEnabled() {
+		return true, ""
+	}
 	if IsOpenCodeUserAgent(userAgent) {
 		return codexInstructionsForOpenCode(systemInstructions)
 	}
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -1,784 +1,69 @@
-// Package registry provides model definitions for various AI service providers.
-// This file contains static model definitions that can be used by clients
-// when registering their supported models.
+// Package registry provides model definitions and lookup helpers for various AI providers.
+// Static model metadata is stored in model_definitions_static_data.go.
 package registry

-// GetClaudeModels returns the standard Claude model definitions
-func GetClaudeModels() []*ModelInfo {
-	return []*ModelInfo{
+import (
+	"sort"
+	"strings"
+)

-		{
-			ID:                  "claude-haiku-4-5-20251001",
-			Object:              "model",
-			Created:             1759276800, // 2025-10-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Haiku",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			// Thinking: not supported for Haiku models
-		},
-		{
-			ID:                  "claude-sonnet-4-5-20250929",
-			Object:              "model",
-			Created:             1759104000, // 2025-09-29
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Sonnet",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-20251101",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus",
-			Description:         "Premium model combining maximum intelligence with practical performance",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-1-20250805",
-			Object:              "model",
-			Created:             1722945600, // 2025-08-05
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.1 Opus",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-20250514",
-			Object:              "model",
-			Created:             1715644800, // 2025-05-14
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4 Opus",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-sonnet-4-20250514",
-			Object:              "model",
-			Created:             1715644800, // 2025-05-14
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4 Sonnet",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-3-7-sonnet-20250219",
-			Object:              "model",
-			Created:             1708300800, // 2025-02-19
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 3.7 Sonnet",
-			ContextLength:       128000,
-			MaxCompletionTokens: 8192,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-3-5-haiku-20241022",
-			Object:              "model",
-			Created:             1729555200, // 2024-10-22
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 3.5 Haiku",
-			ContextLength:       128000,
-			MaxCompletionTokens: 8192,
-			// Thinking: not supported for Haiku models
-		},
-	}
-}
-
-// GetGeminiModels returns the standard Gemini model definitions
-func GetGeminiModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Gemini 3 Flash Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                         "gemini-3-pro-image-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-image-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Image Preview",
-			Description:                "Gemini 3 Pro Image Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-	}
-}
-
-func GetGeminiVertexModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                         "gemini-3-pro-image-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-image-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Image Preview",
-			Description:                "Gemini 3 Pro Image Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-	}
-}
-
-// GetGeminiCLIModels returns the standard Gemini model definitions
-func GetGeminiCLIModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-	}
-}
-
-// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
-func GetAIStudioModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                         "gemini-pro-latest",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-pro-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Pro Latest",
-			Description:                "Latest release of Gemini Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-flash-latest",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-flash-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Flash Latest",
-			Description:                "Latest release of Gemini Flash",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-flash-lite-latest",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-flash-lite-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Flash-Lite Latest",
-			Description:                "Latest release of Gemini Flash-Lite",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-image-preview",
-			Object:                     "model",
-			Created:                    1756166400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image-preview",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image Preview",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			// image models don't support thinkingConfig; leave Thinking nil
-		},
-		{
-			ID:                         "gemini-2.5-flash-image",
-			Object:                     "model",
-			Created:                    1759363200,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			// image models don't support thinkingConfig; leave Thinking nil
-		},
-	}
-}
-
-// GetOpenAIModels returns the standard OpenAI model definitions
-func GetOpenAIModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "gpt-5",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-codex",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-codex-mini",
-			Object:              "model",
-			Created:             1762473600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-11-07",
-			DisplayName:         "GPT 5 Codex Mini",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex",
-			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-mini",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex Mini",
-			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5.1 Codex Max",
-			Description:         "Stable version of GPT 5.1 Codex Max",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2",
-			Object:              "model",
-			Created:             1765440000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.2",
-			DisplayName:         "GPT 5.2",
-			Description:         "Stable version of GPT 5.2",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2-codex",
-			Object:              "model",
-			Created:             1765440000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.2",
-			DisplayName:         "GPT 5.2 Codex",
-			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
-	}
-}
-
-// GetQwenModels returns the standard Qwen model definitions
-func GetQwenModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "qwen3-coder-plus",
-			Object:              "model",
-			Created:             1753228800,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Coder Plus",
-			Description:         "Advanced code generation and understanding model",
-			ContextLength:       32768,
-			MaxCompletionTokens: 8192,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-		{
-			ID:                  "qwen3-coder-flash",
-			Object:              "model",
-			Created:             1753228800,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Coder Flash",
-			Description:         "Fast code generation model",
-			ContextLength:       8192,
-			MaxCompletionTokens: 2048,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-		{
-			ID:                  "vision-model",
-			Object:              "model",
-			Created:             1758672000,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Vision Model",
-			Description:         "Vision model model",
-			ContextLength:       32768,
-			MaxCompletionTokens: 2048,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-	}
-}
-
-// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
-// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
-// Uses level-based configuration so standard normalization flows apply before conversion.
-var iFlowThinkingSupport = &ThinkingSupport{
-	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
-}
-
-// GetIFlowModels returns supported models for iFlow OAuth accounts.
-func GetIFlowModels() []*ModelInfo {
-	entries := []struct {
-		ID          string
-		DisplayName string
-		Description string
-		Created     int64
-		Thinking    *ThinkingSupport
-	}{
-		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
-		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
-		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
-		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
-		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
-		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
-		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
-		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
-		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
-		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
-		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
-		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
-		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
-		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
-		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
-		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
-		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
-		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
-		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
-		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
-	}
-	models := make([]*ModelInfo, 0, len(entries))
-	for _, entry := range entries {
-		models = append(models, &ModelInfo{
-			ID:          entry.ID,
-			Object:      "model",
-			Created:     entry.Created,
-			OwnedBy:     "iflow",
-			Type:        "iflow",
-			DisplayName: entry.DisplayName,
-			Description: entry.Description,
-			Thinking:    entry.Thinking,
+// GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
+// It returns nil when the channel is unknown.
+//
+// Supported channels:
+//   - claude
+//   - gemini
+//   - vertex
+//   - gemini-cli
+//   - aistudio
+//   - codex
+//   - qwen
+//   - iflow
+//   - antigravity (returns static overrides only)
+func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
+	key := strings.ToLower(strings.TrimSpace(channel))
+	switch key {
+	case "claude":
+		return GetClaudeModels()
+	case "gemini":
+		return GetGeminiModels()
+	case "vertex":
+		return GetGeminiVertexModels()
+	case "gemini-cli":
+		return GetGeminiCLIModels()
+	case "aistudio":
+		return GetAIStudioModels()
+	case "codex":
+		return GetOpenAIModels()
+	case "qwen":
+		return GetQwenModels()
+	case "iflow":
+		return GetIFlowModels()
+	case "antigravity":
+		cfg := GetAntigravityModelConfig()
+		if len(cfg) == 0 {
+			return nil
+		}
+		models := make([]*ModelInfo, 0, len(cfg))
+		for modelID, entry := range cfg {
+			if modelID == "" || entry == nil {
+				continue
+			}
+			models = append(models, &ModelInfo{
+				ID:                  modelID,
+				Object:              "model",
+				OwnedBy:             "antigravity",
+				Type:                "antigravity",
+				Thinking:            entry.Thinking,
+				MaxCompletionTokens: entry.MaxCompletionTokens,
+			})
+		}
+		sort.Slice(models, func(i, j int) bool {
+			return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
 		})
-	}
-	return models
-}
-
-// AntigravityModelConfig captures static antigravity model overrides, including
-// Thinking budget limits and provider max completion tokens.
-type AntigravityModelConfig struct {
-	Thinking            *ThinkingSupport
-	MaxCompletionTokens int
-	Name                string
-}
-
-// GetAntigravityModelConfig returns static configuration for antigravity models.
-// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
-func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
-	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
-		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
-		"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
-		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
-		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
-		"gemini-3-flash-preview":                  {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
-		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		return models
+	default:
+		return nil
 	}
 }

@@ -788,6 +73,7 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 	if modelID == "" {
 		return nil
 	}
+
 	allModels := [][]*ModelInfo{
 		GetClaudeModels(),
 		GetGeminiModels(),
@@ -805,5 +91,15 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 			}
 		}
 	}
+
+	// Check Antigravity static config
+	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil {
+		return &ModelInfo{
+			ID:                  modelID,
+			Thinking:            cfg.Thinking,
+			MaxCompletionTokens: cfg.MaxCompletionTokens,
+		}
+	}
+
 	return nil
 }
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -0,0 +1,846 @@
+// Package registry provides model definitions for various AI service providers.
+// This file stores the static model metadata catalog.
+package registry
+
+// GetClaudeModels returns the standard Claude model definitions
+func GetClaudeModels() []*ModelInfo {
+	return []*ModelInfo{
+
+		{
+			ID:                  "claude-haiku-4-5-20251001",
+			Object:              "model",
+			Created:             1759276800, // 2025-10-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Haiku",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			// Thinking: not supported for Haiku models
+		},
+		{
+			ID:                  "claude-sonnet-4-5-20250929",
+			Object:              "model",
+			Created:             1759104000, // 2025-09-29
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-5-20251101",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus",
+			Description:         "Premium model combining maximum intelligence with practical performance",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-1-20250805",
+			Object:              "model",
+			Created:             1722945600, // 2025-08-05
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.1 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-sonnet-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-3-7-sonnet-20250219",
+			Object:              "model",
+			Created:             1708300800, // 2025-02-19
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.7 Sonnet",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-3-5-haiku-20241022",
+			Object:              "model",
+			Created:             1729555200, // 2024-10-22
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.5 Haiku",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+			// Thinking: not supported for Haiku models
+		},
+	}
+}
+
+// GetGeminiModels returns the standard Gemini model definitions
+func GetGeminiModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Gemini 3 Flash Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+	}
+}
+
+func GetGeminiVertexModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		// Imagen image generation models - use :predict action
+		{
+			ID:                         "imagen-4.0-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Generate",
+			Description:                "Imagen 4.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-ultra-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-ultra-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Ultra Generate",
+			Description:                "Imagen 4.0 Ultra high-quality image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-generate-002",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-generate-002",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Generate",
+			Description:                "Imagen 3.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-fast-generate-001",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Fast Generate",
+			Description:                "Imagen 3.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-fast-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Fast Generate",
+			Description:                "Imagen 4.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+	}
+}
+
+// GetGeminiCLIModels returns the standard Gemini model definitions
+func GetGeminiCLIModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+	}
+}
+
+// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
+func GetAIStudioModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-pro-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-pro-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Pro Latest",
+			Description:                "Latest release of Gemini Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash Latest",
+			Description:                "Latest release of Gemini Flash",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-lite-latest",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-lite-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash-Lite Latest",
+			Description:                "Latest release of Gemini Flash-Lite",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		// {
+		// 	ID:                         "gemini-2.5-flash-image-preview",
+		// 	Object:                     "model",
+		// 	Created:                    1756166400,
+		// 	OwnedBy:                    "google",
+		// 	Type:                       "gemini",
+		// 	Name:                       "models/gemini-2.5-flash-image-preview",
+		// 	Version:                    "2.5",
+		// 	DisplayName:                "Gemini 2.5 Flash Image Preview",
+		// 	Description:                "State-of-the-art image generation and editing model.",
+		// 	InputTokenLimit:            1048576,
+		// 	OutputTokenLimit:           8192,
+		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		// 	// image models don't support thinkingConfig; leave Thinking nil
+		// },
+		{
+			ID:                         "gemini-2.5-flash-image",
+			Object:                     "model",
+			Created:                    1759363200,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-image",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Image",
+			Description:                "State-of-the-art image generation and editing model.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           8192,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			// image models don't support thinkingConfig; leave Thinking nil
+		},
+	}
+}
+
+// GetOpenAIModels returns the standard OpenAI model definitions
+func GetOpenAIModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                  "gpt-5",
+			Object:              "model",
+			Created:             1754524800,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-08-07",
+			DisplayName:         "GPT 5",
+			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5-codex",
+			Object:              "model",
+			Created:             1757894400,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-09-15",
+			DisplayName:         "GPT 5 Codex",
+			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5-codex-mini",
+			Object:              "model",
+			Created:             1762473600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-11-07",
+			DisplayName:         "GPT 5 Codex Mini",
+			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5",
+			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5.1 Codex",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex-mini",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5.1 Codex Mini",
+			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max",
+			Object:              "model",
+			Created:             1763424000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5.1 Codex Max",
+			Description:         "Stable version of GPT 5.1 Codex Max",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:                  "gpt-5.2",
+			Object:              "model",
+			Created:             1765440000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.2",
+			DisplayName:         "GPT 5.2",
+			Description:         "Stable version of GPT 5.2",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:                  "gpt-5.2-codex",
+			Object:              "model",
+			Created:             1765440000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.2",
+			DisplayName:         "GPT 5.2 Codex",
+			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
+	}
+}
+
+// GetQwenModels returns the standard Qwen model definitions
+func GetQwenModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                  "qwen3-coder-plus",
+			Object:              "model",
+			Created:             1753228800,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Coder Plus",
+			Description:         "Advanced code generation and understanding model",
+			ContextLength:       32768,
+			MaxCompletionTokens: 8192,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+		{
+			ID:                  "qwen3-coder-flash",
+			Object:              "model",
+			Created:             1753228800,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Coder Flash",
+			Description:         "Fast code generation model",
+			ContextLength:       8192,
+			MaxCompletionTokens: 2048,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+		{
+			ID:                  "vision-model",
+			Object:              "model",
+			Created:             1758672000,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Vision Model",
+			Description:         "Vision model model",
+			ContextLength:       32768,
+			MaxCompletionTokens: 2048,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+	}
+}
+
+// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
+// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
+// Uses level-based configuration so standard normalization flows apply before conversion.
+var iFlowThinkingSupport = &ThinkingSupport{
+	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
+}
+
+// GetIFlowModels returns supported models for iFlow OAuth accounts.
+func GetIFlowModels() []*ModelInfo {
+	entries := []struct {
+		ID          string
+		DisplayName string
+		Description string
+		Created     int64
+		Thinking    *ThinkingSupport
+	}{
+		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
+		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
+		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
+		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
+		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport},
+		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
+		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
+		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
+		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
+		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
+		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
+		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
+		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport},
+		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
+		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
+		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
+		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
+		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
+		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
+		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
+		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
+	}
+	models := make([]*ModelInfo, 0, len(entries))
+	for _, entry := range entries {
+		models = append(models, &ModelInfo{
+			ID:          entry.ID,
+			Object:      "model",
+			Created:     entry.Created,
+			OwnedBy:     "iflow",
+			Type:        "iflow",
+			DisplayName: entry.DisplayName,
+			Description: entry.Description,
+			Thinking:    entry.Thinking,
+		})
+	}
+	return models
+}
+
+// AntigravityModelConfig captures static antigravity model overrides, including
+// Thinking budget limits and provider max completion tokens.
+type AntigravityModelConfig struct {
+	Thinking            *ThinkingSupport
+	MaxCompletionTokens int
+}
+
+// GetAntigravityModelConfig returns static configuration for antigravity models.
+// Keys use upstream model names returned by the Antigravity models endpoint.
+func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
+	return map[string]*AntigravityModelConfig{
+		// "rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
+		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
+		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
+		"gpt-oss-120b-medium":        {},
+		"tab_flash_lite_preview":     {},
+	}
+}
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -51,6 +51,11 @@ type ModelInfo struct {
 	// Thinking holds provider-specific reasoning/thinking budget capabilities.
 	// This is optional and currently used for Gemini thinking budget normalization.
 	Thinking *ThinkingSupport `json:"thinking,omitempty"`
+
+	// UserDefined indicates this model was defined through config file's models[]
+	// array (e.g., openai-compatibility.*.models[], *-api-key.models[]).
+	// UserDefined models have thinking configuration passed through without validation.
+	UserDefined bool `json:"-"`
 }

 // ThinkingSupport describes a model family's supported internal reasoning budget range.
@@ -73,6 +78,8 @@ type ThinkingSupport struct {
 type ModelRegistration struct {
 	// Info contains the model metadata
 	Info *ModelInfo
+	// InfoByProvider maps provider identifiers to specific ModelInfo to support differing capabilities.
+	InfoByProvider map[string]*ModelInfo
 	// Count is the number of active clients that can provide this model
 	Count int
 	// LastUpdated tracks when this registration was last modified
@@ -127,6 +134,24 @@ func GetGlobalRegistry() *ModelRegistry {
 	return globalRegistry
 }

+// LookupModelInfo searches dynamic registry (provider-specific > global) then static definitions.
+func LookupModelInfo(modelID string, provider ...string) *ModelInfo {
+	modelID = strings.TrimSpace(modelID)
+	if modelID == "" {
+		return nil
+	}
+
+	p := ""
+	if len(provider) > 0 {
+		p = strings.ToLower(strings.TrimSpace(provider[0]))
+	}
+
+	if info := GetGlobalRegistry().GetModelInfo(modelID, p); info != nil {
+		return info
+	}
+	return LookupStaticModelInfo(modelID)
+}
+
 // SetHook sets an optional hook for observing model registration changes.
 func (r *ModelRegistry) SetHook(hook ModelRegistryHook) {
 	if r == nil {
@@ -277,6 +302,9 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 				if count, okProv := reg.Providers[oldProvider]; okProv {
 					if count <= toRemove {
 						delete(reg.Providers, oldProvider)
+						if reg.InfoByProvider != nil {
+							delete(reg.InfoByProvider, oldProvider)
+						}
 					} else {
 						reg.Providers[oldProvider] = count - toRemove
 					}
@@ -326,6 +354,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		model := newModels[id]
 		if reg, ok := r.models[id]; ok {
 			reg.Info = cloneModelInfo(model)
+			if provider != "" {
+				if reg.InfoByProvider == nil {
+					reg.InfoByProvider = make(map[string]*ModelInfo)
+				}
+				reg.InfoByProvider[provider] = cloneModelInfo(model)
+			}
 			reg.LastUpdated = now
 			if reg.QuotaExceededClients != nil {
 				delete(reg.QuotaExceededClients, clientID)
@@ -389,11 +423,15 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
 		if existing.SuspendedClients == nil {
 			existing.SuspendedClients = make(map[string]string)
 		}
+		if existing.InfoByProvider == nil {
+			existing.InfoByProvider = make(map[string]*ModelInfo)
+		}
 		if provider != "" {
 			if existing.Providers == nil {
 				existing.Providers = make(map[string]int)
 			}
 			existing.Providers[provider]++
+			existing.InfoByProvider[provider] = cloneModelInfo(model)
 		}
 		log.Debugf("Incremented count for model %s, now %d clients", modelID, existing.Count)
 		return
@@ -401,6 +439,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo

 	registration := &ModelRegistration{
 		Info:                 cloneModelInfo(model),
+		InfoByProvider:       make(map[string]*ModelInfo),
 		Count:                1,
 		LastUpdated:          now,
 		QuotaExceededClients: make(map[string]*time.Time),
@@ -408,6 +447,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
 	}
 	if provider != "" {
 		registration.Providers = map[string]int{provider: 1}
+		registration.InfoByProvider[provider] = cloneModelInfo(model)
 	}
 	r.models[modelID] = registration
 	log.Debugf("Registered new model %s from provider %s", modelID, provider)
@@ -433,6 +473,9 @@ func (r *ModelRegistry) removeModelRegistration(clientID, modelID, provider stri
 		if count, ok := registration.Providers[provider]; ok {
 			if count <= 1 {
 				delete(registration.Providers, provider)
+				if registration.InfoByProvider != nil {
+					delete(registration.InfoByProvider, provider)
+				}
 			} else {
 				registration.Providers[provider] = count - 1
 			}
@@ -514,6 +557,9 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
 				if count, ok := registration.Providers[provider]; ok {
 					if count <= 1 {
 						delete(registration.Providers, provider)
+						if registration.InfoByProvider != nil {
+							delete(registration.InfoByProvider, provider)
+						}
 					} else {
 						registration.Providers[provider] = count - 1
 					}
@@ -920,12 +966,22 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
 	return result
 }

-// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
-// Returns nil if the model is unknown to the registry.
-func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
+// GetModelInfo returns ModelInfo, prioritizing provider-specific definition if available.
+func (r *ModelRegistry) GetModelInfo(modelID, provider string) *ModelInfo {
 	r.mutex.RLock()
 	defer r.mutex.RUnlock()
 	if reg, ok := r.models[modelID]; ok && reg != nil {
+		// Try provider specific definition first
+		if provider != "" && reg.InfoByProvider != nil {
+			if reg.Providers != nil {
+				if count, ok := reg.Providers[provider]; ok && count > 0 {
+					if info, ok := reg.InfoByProvider[provider]; ok && info != nil {
+						return info
+					}
+				}
+			}
+		}
+		// Fallback to global info (last registered)
 		return reg.Info
 	}
 	return nil
@@ -977,10 +1033,10 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 			"owned_by": model.OwnedBy,
 		}
 		if model.Created > 0 {
-			result["created"] = model.Created
+			result["created_at"] = model.Created
 		}
 		if model.Type != "" {
-			result["type"] = model.Type
+			result["type"] = "model"
 		}
 		if model.DisplayName != "" {
 			result["display_name"] = model.DisplayName
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -14,7 +14,7 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -111,7 +111,8 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A

 // Execute performs a non-streaming request to the AI Studio API.
 func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	translatedReq, body, err := e.translateRequest(req, opts, false)
@@ -119,7 +120,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		return resp, err
 	}

-	endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
+	endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -166,7 +167,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,

 // ExecuteStream performs a streaming request to the AI Studio API.
 func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	translatedReq, body, err := e.translateRequest(req, opts, true)
@@ -174,7 +176,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		return nil, err
 	}

-	endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
+	endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -315,6 +317,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth

 // CountTokens counts tokens for the given request using the AI Studio API.
 func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	_, body, err := e.translateRequest(req, opts, false)
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
@@ -324,7 +327,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	body.payload, _ = sjson.DeleteBytes(body.payload, "tools")
 	body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings")

-	endpoint := e.buildEndpoint(req.Model, "countTokens", "")
+	endpoint := e.buildEndpoint(baseModel, "countTokens", "")
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -380,22 +383,23 @@ type translatedPayload struct {
 }

 func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
-	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
-	payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
-	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
-	payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true)
-	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
-	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
-	payload = fixGeminiImageAspectRatio(req.Model, payload)
-	payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, translatedPayload{}, err
+	}
+	payload = fixGeminiImageAspectRatio(baseModel, payload)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated, requestedModel)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -17,7 +17,7 @@ import (
 	claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -84,17 +84,15 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
 }

 func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

+	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
@@ -103,23 +101,25 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
-	body, _ = sjson.SetBytes(body, "model", model)
-	// Inject thinking config based on model metadata for thinking variants
-	body = e.injectThinkingConfig(model, req.Metadata, body)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body, _ = sjson.SetBytes(body, "model", baseModel)

-	if !strings.HasPrefix(model, "claude-3-5-haiku") {
-		body = checkSystemInstructions(body)
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+
+	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
+	// based on client type and configuration.
+	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)

-	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(model, body)
-
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -163,7 +163,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
@@ -218,37 +218,40 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 }

 func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

+	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
-	body, _ = sjson.SetBytes(body, "model", model)
-	// Inject thinking config based on model metadata for thinking variants
-	body = e.injectThinkingConfig(model, req.Metadata, body)
-	body = checkSystemInstructions(body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
+	}
+
+	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
+	// based on client type and configuration.
+	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)

-	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(model, body)
-
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -292,7 +295,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
 		}
@@ -381,8 +384,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 }

 func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

+	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
@@ -391,14 +395,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
-	body, _ = sjson.SetBytes(body, "model", model)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body, _ = sjson.SetBytes(body, "model", baseModel)

-	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}

@@ -527,17 +527,6 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
 	return betas, body
 }

-// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
-// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
-// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
-func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
-	budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
-	if !ok {
-		return body
-	}
-	return util.ApplyClaudeThinkingConfig(body, budget)
-}
-
 // disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking.
 // Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool.
 // See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations
@@ -551,126 +540,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
 	return body
 }

-// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
-// Anthropic API requires this constraint; violating it returns a 400 error.
-// This function should be called after all thinking configuration is finalized.
-// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
-func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
-	thinkingType := gjson.GetBytes(body, "thinking.type").String()
-	if thinkingType != "enabled" {
-		return body
-	}
-
-	budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
-	if budgetTokens <= 0 {
-		return body
-	}
-
-	maxTokens := gjson.GetBytes(body, "max_tokens").Int()
-
-	// Look up the model's max completion tokens from the registry
-	maxCompletionTokens := 0
-	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil {
-		maxCompletionTokens = modelInfo.MaxCompletionTokens
-	}
-
-	// Fall back to budget + buffer if registry lookup fails or returns 0
-	const fallbackBuffer = 4000
-	requiredMaxTokens := budgetTokens + fallbackBuffer
-	if maxCompletionTokens > 0 {
-		requiredMaxTokens = int64(maxCompletionTokens)
-	}
-
-	if maxTokens < requiredMaxTokens {
-		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
-	}
-	return body
-}
-
-func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveClaudeConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
-func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
-	if auth == nil || e.cfg == nil {
-		return nil
-	}
-	var attrKey, attrBase string
-	if auth.Attributes != nil {
-		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
-		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
-	}
-	for i := range e.cfg.ClaudeKey {
-		entry := &e.cfg.ClaudeKey[i]
-		cfgKey := strings.TrimSpace(entry.APIKey)
-		cfgBase := strings.TrimSpace(entry.BaseURL)
-		if attrKey != "" && attrBase != "" {
-			if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
-				return entry
-			}
-			continue
-		}
-		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
-			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
-				return entry
-			}
-		}
-		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
-			return entry
-		}
-	}
-	if attrKey != "" {
-		for i := range e.cfg.ClaudeKey {
-			entry := &e.cfg.ClaudeKey[i]
-			if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
-				return entry
-			}
-		}
-	}
-	return nil
-}
-
 type compositeReadCloser struct {
 	io.Reader
 	closers []func() error
@@ -864,6 +733,11 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {

 	if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() {
 		tools.ForEach(func(index, tool gjson.Result) bool {
+			// Skip built-in tools (web_search, code_execution, etc.) which have
+			// a "type" field and require their name to remain unchanged.
+			if tool.Get("type").Exists() && tool.Get("type").String() != "" {
+				return true
+			}
 			name := tool.Get("name").String()
 			if name == "" || strings.HasPrefix(name, prefix) {
 				return true
@@ -956,3 +830,163 @@ func stripClaudeToolPrefixFromStreamLine(line []byte, prefix string) []byte {
 	}
 	return updated
 }
+
+// getClientUserAgent extracts the client User-Agent from the gin context.
+func getClientUserAgent(ctx context.Context) string {
+	if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
+		return ginCtx.GetHeader("User-Agent")
+	}
+	return ""
+}
+
+// getCloakConfigFromAuth extracts cloak configuration from auth attributes.
+// Returns (cloakMode, strictMode, sensitiveWords).
+func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string) {
+	if auth == nil || auth.Attributes == nil {
+		return "auto", false, nil
+	}
+
+	cloakMode := auth.Attributes["cloak_mode"]
+	if cloakMode == "" {
+		cloakMode = "auto"
+	}
+
+	strictMode := strings.ToLower(auth.Attributes["cloak_strict_mode"]) == "true"
+
+	var sensitiveWords []string
+	if wordsStr := auth.Attributes["cloak_sensitive_words"]; wordsStr != "" {
+		sensitiveWords = strings.Split(wordsStr, ",")
+		for i := range sensitiveWords {
+			sensitiveWords[i] = strings.TrimSpace(sensitiveWords[i])
+		}
+	}
+
+	return cloakMode, strictMode, sensitiveWords
+}
+
+// resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig.
+func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.CloakConfig {
+	if cfg == nil || auth == nil {
+		return nil
+	}
+
+	apiKey, baseURL := claudeCreds(auth)
+	if apiKey == "" {
+		return nil
+	}
+
+	for i := range cfg.ClaudeKey {
+		entry := &cfg.ClaudeKey[i]
+		cfgKey := strings.TrimSpace(entry.APIKey)
+		cfgBase := strings.TrimSpace(entry.BaseURL)
+
+		// Match by API key
+		if strings.EqualFold(cfgKey, apiKey) {
+			// If baseURL is specified, also check it
+			if baseURL != "" && cfgBase != "" && !strings.EqualFold(cfgBase, baseURL) {
+				continue
+			}
+			return entry.Cloak
+		}
+	}
+
+	return nil
+}
+
+// injectFakeUserID generates and injects a fake user ID into the request metadata.
+func injectFakeUserID(payload []byte) []byte {
+	metadata := gjson.GetBytes(payload, "metadata")
+	if !metadata.Exists() {
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
+		return payload
+	}
+
+	existingUserID := gjson.GetBytes(payload, "metadata.user_id").String()
+	if existingUserID == "" || !isValidUserID(existingUserID) {
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
+	}
+	return payload
+}
+
+// checkSystemInstructionsWithMode injects Claude Code system prompt.
+// In strict mode, it replaces all user system messages.
+// In non-strict mode (default), it prepends to existing system messages.
+func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
+	system := gjson.GetBytes(payload, "system")
+	claudeCodeInstructions := `[{"type":"text","text":"You are Claude Code, Anthropic's official CLI for Claude."}]`
+
+	if strictMode {
+		// Strict mode: replace all system messages with Claude Code prompt only
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		return payload
+	}
+
+	// Non-strict mode (default): prepend Claude Code prompt to existing system messages
+	if system.IsArray() {
+		if gjson.GetBytes(payload, "system.0.text").String() != "You are Claude Code, Anthropic's official CLI for Claude." {
+			system.ForEach(func(_, part gjson.Result) bool {
+				if part.Get("type").String() == "text" {
+					claudeCodeInstructions, _ = sjson.SetRaw(claudeCodeInstructions, "-1", part.Raw)
+				}
+				return true
+			})
+			payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		}
+	} else {
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+	}
+	return payload
+}
+
+// applyCloaking applies cloaking transformations to the payload based on config and client.
+// Cloaking includes: system prompt injection, fake user ID, and sensitive word obfuscation.
+func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string) []byte {
+	clientUserAgent := getClientUserAgent(ctx)
+
+	// Get cloak config from ClaudeKey configuration
+	cloakCfg := resolveClaudeKeyCloakConfig(cfg, auth)
+
+	// Determine cloak settings
+	var cloakMode string
+	var strictMode bool
+	var sensitiveWords []string
+
+	if cloakCfg != nil {
+		cloakMode = cloakCfg.Mode
+		strictMode = cloakCfg.StrictMode
+		sensitiveWords = cloakCfg.SensitiveWords
+	}
+
+	// Fallback to auth attributes if no config found
+	if cloakMode == "" {
+		attrMode, attrStrict, attrWords := getCloakConfigFromAuth(auth)
+		cloakMode = attrMode
+		if !strictMode {
+			strictMode = attrStrict
+		}
+		if len(sensitiveWords) == 0 {
+			sensitiveWords = attrWords
+		}
+	}
+
+	// Determine if cloaking should be applied
+	if !shouldCloak(cloakMode, clientUserAgent) {
+		return payload
+	}
+
+	// Skip system instructions for claude-3-5-haiku models
+	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+		payload = checkSystemInstructionsWithMode(payload, strictMode)
+	}
+
+	// Inject fake user ID
+	payload = injectFakeUserID(payload)
+
+	// Apply sensitive word obfuscation
+	if len(sensitiveWords) > 0 {
+		matcher := buildSensitiveWordMatcher(sensitiveWords)
+		payload = obfuscateSensitiveWords(payload, matcher)
+	}
+
+	return payload
+}
--- a/internal/runtime/executor/claude_executor_test.go
+++ b/internal/runtime/executor/claude_executor_test.go
@@ -25,6 +25,18 @@ func TestApplyClaudeToolPrefix(t *testing.T) {
 	}
 }

+func TestApplyClaudeToolPrefix_SkipsBuiltinTools(t *testing.T) {
+	input := []byte(`{"tools":[{"type":"web_search_20250305","name":"web_search"},{"name":"my_custom_tool","input_schema":{"type":"object"}}]}`)
+	out := applyClaudeToolPrefix(input, "proxy_")
+
+	if got := gjson.GetBytes(out, "tools.0.name").String(); got != "web_search" {
+		t.Fatalf("built-in tool name should not be prefixed: tools.0.name = %q, want %q", got, "web_search")
+	}
+	if got := gjson.GetBytes(out, "tools.1.name").String(); got != "proxy_my_custom_tool" {
+		t.Fatalf("custom tool should be prefixed: tools.1.name = %q, want %q", got, "proxy_my_custom_tool")
+	}
+}
+
 func TestStripClaudeToolPrefixFromResponse(t *testing.T) {
 	input := []byte(`{"content":[{"type":"tool_use","name":"proxy_alpha","id":"t1","input":{}},{"type":"tool_use","name":"bravo","id":"t2","input":{}}]}`)
 	out := stripClaudeToolPrefixFromResponse(input, "proxy_")
--- a/internal/runtime/executor/cloak_obfuscate.go
+++ b/internal/runtime/executor/cloak_obfuscate.go
@@ -0,0 +1,176 @@
+package executor
+
+import (
+	"regexp"
+	"sort"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// zeroWidthSpace is the Unicode zero-width space character used for obfuscation.
+const zeroWidthSpace = "\u200B"
+
+// SensitiveWordMatcher holds the compiled regex for matching sensitive words.
+type SensitiveWordMatcher struct {
+	regex *regexp.Regexp
+}
+
+// buildSensitiveWordMatcher compiles a regex from the word list.
+// Words are sorted by length (longest first) for proper matching.
+func buildSensitiveWordMatcher(words []string) *SensitiveWordMatcher {
+	if len(words) == 0 {
+		return nil
+	}
+
+	// Filter and normalize words
+	var validWords []string
+	for _, w := range words {
+		w = strings.TrimSpace(w)
+		if utf8.RuneCountInString(w) >= 2 && !strings.Contains(w, zeroWidthSpace) {
+			validWords = append(validWords, w)
+		}
+	}
+
+	if len(validWords) == 0 {
+		return nil
+	}
+
+	// Sort by length (longest first) for proper matching
+	sort.Slice(validWords, func(i, j int) bool {
+		return len(validWords[i]) > len(validWords[j])
+	})
+
+	// Escape and join
+	escaped := make([]string, len(validWords))
+	for i, w := range validWords {
+		escaped[i] = regexp.QuoteMeta(w)
+	}
+
+	pattern := "(?i)" + strings.Join(escaped, "|")
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil
+	}
+
+	return &SensitiveWordMatcher{regex: re}
+}
+
+// obfuscateWord inserts a zero-width space after the first grapheme.
+func obfuscateWord(word string) string {
+	if strings.Contains(word, zeroWidthSpace) {
+		return word
+	}
+
+	// Get first rune
+	r, size := utf8.DecodeRuneInString(word)
+	if r == utf8.RuneError || size >= len(word) {
+		return word
+	}
+
+	return string(r) + zeroWidthSpace + word[size:]
+}
+
+// obfuscateText replaces all sensitive words in the text.
+func (m *SensitiveWordMatcher) obfuscateText(text string) string {
+	if m == nil || m.regex == nil {
+		return text
+	}
+	return m.regex.ReplaceAllStringFunc(text, obfuscateWord)
+}
+
+// obfuscateSensitiveWords processes the payload and obfuscates sensitive words
+// in system blocks and message content.
+func obfuscateSensitiveWords(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	if matcher == nil || matcher.regex == nil {
+		return payload
+	}
+
+	// Obfuscate in system blocks
+	payload = obfuscateSystemBlocks(payload, matcher)
+
+	// Obfuscate in messages
+	payload = obfuscateMessages(payload, matcher)
+
+	return payload
+}
+
+// obfuscateSystemBlocks obfuscates sensitive words in system blocks.
+func obfuscateSystemBlocks(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	system := gjson.GetBytes(payload, "system")
+	if !system.Exists() {
+		return payload
+	}
+
+	if system.IsArray() {
+		modified := false
+		system.ForEach(func(key, value gjson.Result) bool {
+			if value.Get("type").String() == "text" {
+				text := value.Get("text").String()
+				obfuscated := matcher.obfuscateText(text)
+				if obfuscated != text {
+					path := "system." + key.String() + ".text"
+					payload, _ = sjson.SetBytes(payload, path, obfuscated)
+					modified = true
+				}
+			}
+			return true
+		})
+		if modified {
+			return payload
+		}
+	} else if system.Type == gjson.String {
+		text := system.String()
+		obfuscated := matcher.obfuscateText(text)
+		if obfuscated != text {
+			payload, _ = sjson.SetBytes(payload, "system", obfuscated)
+		}
+	}
+
+	return payload
+}
+
+// obfuscateMessages obfuscates sensitive words in message content.
+func obfuscateMessages(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	messages := gjson.GetBytes(payload, "messages")
+	if !messages.Exists() || !messages.IsArray() {
+		return payload
+	}
+
+	messages.ForEach(func(msgKey, msg gjson.Result) bool {
+		content := msg.Get("content")
+		if !content.Exists() {
+			return true
+		}
+
+		msgPath := "messages." + msgKey.String()
+
+		if content.Type == gjson.String {
+			// Simple string content
+			text := content.String()
+			obfuscated := matcher.obfuscateText(text)
+			if obfuscated != text {
+				payload, _ = sjson.SetBytes(payload, msgPath+".content", obfuscated)
+			}
+		} else if content.IsArray() {
+			// Array of content blocks
+			content.ForEach(func(blockKey, block gjson.Result) bool {
+				if block.Get("type").String() == "text" {
+					text := block.Get("text").String()
+					obfuscated := matcher.obfuscateText(text)
+					if obfuscated != text {
+						path := msgPath + ".content." + blockKey.String() + ".text"
+						payload, _ = sjson.SetBytes(payload, path, obfuscated)
+					}
+				}
+				return true
+			})
+		}
+
+		return true
+	})
+
+	return payload
+}
--- a/internal/runtime/executor/cloak_utils.go
+++ b/internal/runtime/executor/cloak_utils.go
@@ -0,0 +1,47 @@
+package executor
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"regexp"
+	"strings"
+
+	"github.com/google/uuid"
+)
+
+// userIDPattern matches Claude Code format: user_[64-hex]_account__session_[uuid-v4]
+var userIDPattern = regexp.MustCompile(`^user_[a-fA-F0-9]{64}_account__session_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`)
+
+// generateFakeUserID generates a fake user ID in Claude Code format.
+// Format: user_[64-hex-chars]_account__session_[UUID-v4]
+func generateFakeUserID() string {
+	hexBytes := make([]byte, 32)
+	_, _ = rand.Read(hexBytes)
+	hexPart := hex.EncodeToString(hexBytes)
+	uuidPart := uuid.New().String()
+	return "user_" + hexPart + "_account__session_" + uuidPart
+}
+
+// isValidUserID checks if a user ID matches Claude Code format.
+func isValidUserID(userID string) bool {
+	return userIDPattern.MatchString(userID)
+}
+
+// shouldCloak determines if request should be cloaked based on config and client User-Agent.
+// Returns true if cloaking should be applied.
+func shouldCloak(cloakMode string, userAgent string) bool {
+	switch strings.ToLower(cloakMode) {
+	case "always":
+		return true
+	case "never":
+		return false
+	default: // "auto" or empty
+		// If client is Claude Code, don't cloak
+		return !strings.HasPrefix(userAgent, "claude-cli")
+	}
+}
+
+// isClaudeCodeClient checks if the User-Agent indicates a Claude Code client.
+func isClaudeCodeClient(userAgent string) bool {
+	return strings.HasPrefix(userAgent, "claude-cli")
+}
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -13,6 +13,7 @@ import (
 	codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -72,18 +73,15 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
 }

 func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	apiKey, baseURL := codexCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

+	apiKey, baseURL := codexCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)

-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -93,20 +91,26 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, false)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
-	body = NormalizeThinkingConfig(body, model, false)
-	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
-		return resp, errValidate
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -146,7 +150,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -182,18 +186,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 }

 func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	apiKey, baseURL := codexCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

+	apiKey, baseURL := codexCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)

-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -203,20 +204,25 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, true)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
 	body = misc.StripCodexUserAgent(body)

-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
-	body = NormalizeThinkingConfig(body, model, false)
-	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
-		return nil, errValidate
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
-	body, _ = sjson.SetBytes(body, "model", model)
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -259,7 +265,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			return nil, readErr
 		}
 		appendAPIResponseChunk(ctx, e.cfg, data)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
 		return nil, err
 	}
@@ -303,25 +309,30 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }

 func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	userAgent := codexUserAgent(ctx)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, false)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)

-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
-	body, _ = sjson.SetBytes(body, "model", model)
+	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
+
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "stream", false)
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}

-	enc, err := tokenizerForCodexModel(model)
+	enc, err := tokenizerForCodexModel(baseModel)
 	if err != nil {
 		return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err)
 	}
@@ -593,51 +604,6 @@ func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
 	return
 }

-func (e *CodexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveCodexConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
 func (e *CodexExecutor) resolveCodexConfig(auth *cliproxyauth.Auth) *config.CodexKey {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -20,6 +20,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -102,28 +103,34 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.

 // Execute performs a non-streaming request to the Gemini CLI API.
 func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return resp, err
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
-	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
-	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
-	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
+	}
+
+	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -133,9 +140,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	}

 	projectID := resolveGeminiProjectID(auth)
-	models := cliPreviewFallbackOrder(req.Model)
-	if len(models) == 0 || models[0] != req.Model {
-		models = append([]string{req.Model}, models...)
+	models := cliPreviewFallbackOrder(baseModel)
+	if len(models) == 0 || models[0] != baseModel {
+		models = append([]string{baseModel}, models...)
 	}

 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -220,7 +227,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth

 		lastStatus = httpResp.StatusCode
 		lastBody = append([]byte(nil), data...)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		if httpResp.StatusCode == 429 {
 			if idx+1 < len(models) {
 				log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
@@ -246,34 +253,40 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth

 // ExecuteStream performs a streaming request to the Gemini CLI API.
 func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return nil, err
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
-	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
-	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
-	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
+	}
+
+	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	projectID := resolveGeminiProjectID(auth)

-	models := cliPreviewFallbackOrder(req.Model)
-	if len(models) == 0 || models[0] != req.Model {
-		models = append([]string{req.Model}, models...)
+	models := cliPreviewFallbackOrder(baseModel)
+	if len(models) == 0 || models[0] != baseModel {
+		models = append([]string{baseModel}, models...)
 	}

 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -347,7 +360,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			appendAPIResponseChunk(ctx, e.cfg, data)
 			lastStatus = httpResp.StatusCode
 			lastBody = append([]byte(nil), data...)
-			log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+			logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 			if httpResp.StatusCode == 429 {
 				if idx+1 < len(models) {
 					log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
@@ -435,6 +448,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut

 // CountTokens counts tokens for the given request using the Gemini CLI API.
 func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
@@ -443,9 +458,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")

-	models := cliPreviewFallbackOrder(req.Model)
-	if len(models) == 0 || models[0] != req.Model {
-		models = append([]string{req.Model}, models...)
+	models := cliPreviewFallbackOrder(baseModel)
+	if len(models) == 0 || models[0] != baseModel {
+		models = append([]string{baseModel}, models...)
 	}

 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -463,15 +478,18 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.

 	// The loop variable attemptModel is only used as the concrete model id sent to the upstream
 	// Gemini CLI endpoint when iterating fallback variants.
-	for _, attemptModel := range models {
-		payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
-		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
-		payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
+	for range models {
+		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
+		if err != nil {
+			return cliproxyexecutor.Response{}, err
+		}
+
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
 		payload = deleteJSONField(payload, "request.safetySettings")
-		payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
-		payload = fixGeminiCLIImageAspectRatio(req.Model, payload)
+		payload = fixGeminiCLIImageAspectRatio(baseModel, payload)

 		tok, errTok := tokenSource.Token()
 		if errTok != nil {
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -13,6 +13,7 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -102,16 +103,13 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
 //   - cliproxyexecutor.Response: The response from the API
 //   - error: An error if the request fails
 func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, bearer := geminiCreds(auth)

-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

-	model := req.Model
-	if override := e.resolveUpstreamModel(model, auth); override != "" {
-		model = override
-	}
-
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -119,15 +117,18 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	body = ApplyThinkingMetadata(body, req.Metadata, model)
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
+	}
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -136,7 +137,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		}
 	}
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action)
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -187,7 +188,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -206,34 +207,34 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r

 // ExecuteStream performs a streaming request to the Gemini API.
 func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, bearer := geminiCreds(auth)

-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

-	model := req.Model
-	if override := e.resolveUpstreamModel(model, auth); override != "" {
-		model = override
-	}
-
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
-	body = ApplyThinkingMetadata(body, req.Metadata, model)
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
+	}
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, _ = sjson.SetBytes(body, "model", baseModel)

 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -281,7 +282,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("gemini executor: close response body error: %v", errClose)
 		}
@@ -331,27 +332,28 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 // CountTokens counts tokens for the given request using the Gemini API.
 func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	apiKey, bearer := geminiCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

-	model := req.Model
-	if override := e.resolveUpstreamModel(model, auth); override != "" {
-		model = override
-	}
+	apiKey, bearer := geminiCreds(auth)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model)
-	translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
-	translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
+
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)

 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "countTokens")

 	requestBody := bytes.NewReader(translatedReq)

@@ -400,7 +402,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-		log.Debugf("request error, error status: %d, error body: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data))
 		return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(data)}
 	}

@@ -450,51 +452,6 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string {
 	return base
 }

-func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveGeminiConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
 func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -12,10 +12,11 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"

 	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -31,6 +32,143 @@ const (
 	vertexAPIVersion = "v1"
 )

+// isImagenModel checks if the model name is an Imagen image generation model.
+// Imagen models use the :predict action instead of :generateContent.
+func isImagenModel(model string) bool {
+	lowerModel := strings.ToLower(model)
+	return strings.Contains(lowerModel, "imagen")
+}
+
+// getVertexAction returns the appropriate action for the given model.
+// Imagen models use "predict", while Gemini models use "generateContent".
+func getVertexAction(model string, isStream bool) string {
+	if isImagenModel(model) {
+		return "predict"
+	}
+	if isStream {
+		return "streamGenerateContent"
+	}
+	return "generateContent"
+}
+
+// convertImagenToGeminiResponse converts Imagen API response to Gemini format
+// so it can be processed by the standard translation pipeline.
+// This ensures Imagen models return responses in the same format as gemini-3-pro-image-preview.
+func convertImagenToGeminiResponse(data []byte, model string) []byte {
+	predictions := gjson.GetBytes(data, "predictions")
+	if !predictions.Exists() || !predictions.IsArray() {
+		return data
+	}
+
+	// Build Gemini-compatible response with inlineData
+	parts := make([]map[string]any, 0)
+	for _, pred := range predictions.Array() {
+		imageData := pred.Get("bytesBase64Encoded").String()
+		mimeType := pred.Get("mimeType").String()
+		if mimeType == "" {
+			mimeType = "image/png"
+		}
+		if imageData != "" {
+			parts = append(parts, map[string]any{
+				"inlineData": map[string]any{
+					"mimeType": mimeType,
+					"data":     imageData,
+				},
+			})
+		}
+	}
+
+	// Generate unique response ID using timestamp
+	responseId := fmt.Sprintf("imagen-%d", time.Now().UnixNano())
+
+	response := map[string]any{
+		"candidates": []map[string]any{{
+			"content": map[string]any{
+				"parts": parts,
+				"role":  "model",
+			},
+			"finishReason": "STOP",
+		}},
+		"responseId":   responseId,
+		"modelVersion": model,
+		// Imagen API doesn't return token counts, set to 0 for tracking purposes
+		"usageMetadata": map[string]any{
+			"promptTokenCount":     0,
+			"candidatesTokenCount": 0,
+			"totalTokenCount":      0,
+		},
+	}
+
+	result, err := json.Marshal(response)
+	if err != nil {
+		return data
+	}
+	return result
+}
+
+// convertToImagenRequest converts a Gemini-style request to Imagen API format.
+// Imagen API uses a different structure: instances[].prompt instead of contents[].
+func convertToImagenRequest(payload []byte) ([]byte, error) {
+	// Extract prompt from Gemini-style contents
+	prompt := ""
+
+	// Try to get prompt from contents[0].parts[0].text
+	contentsText := gjson.GetBytes(payload, "contents.0.parts.0.text")
+	if contentsText.Exists() {
+		prompt = contentsText.String()
+	}
+
+	// If no contents, try messages format (OpenAI-compatible)
+	if prompt == "" {
+		messagesText := gjson.GetBytes(payload, "messages.#.content")
+		if messagesText.Exists() && messagesText.IsArray() {
+			for _, msg := range messagesText.Array() {
+				if msg.String() != "" {
+					prompt = msg.String()
+					break
+				}
+			}
+		}
+	}
+
+	// If still no prompt, try direct prompt field
+	if prompt == "" {
+		directPrompt := gjson.GetBytes(payload, "prompt")
+		if directPrompt.Exists() {
+			prompt = directPrompt.String()
+		}
+	}
+
+	if prompt == "" {
+		return nil, fmt.Errorf("imagen: no prompt found in request")
+	}
+
+	// Build Imagen API request
+	imagenReq := map[string]any{
+		"instances": []map[string]any{
+			{
+				"prompt": prompt,
+			},
+		},
+		"parameters": map[string]any{
+			"sampleCount": 1,
+		},
+	}
+
+	// Extract optional parameters
+	if aspectRatio := gjson.GetBytes(payload, "aspectRatio"); aspectRatio.Exists() {
+		imagenReq["parameters"].(map[string]any)["aspectRatio"] = aspectRatio.String()
+	}
+	if sampleCount := gjson.GetBytes(payload, "sampleCount"); sampleCount.Exists() {
+		imagenReq["parameters"].(map[string]any)["sampleCount"] = int(sampleCount.Int())
+	}
+	if negativePrompt := gjson.GetBytes(payload, "negativePrompt"); negativePrompt.Exists() {
+		imagenReq["instances"].([]map[string]any)[0]["negativePrompt"] = negativePrompt.String()
+	}
+
+	return json.Marshal(imagenReq)
+}
+
 // GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials.
 type GeminiVertexExecutor struct {
 	cfg *config.Config
@@ -155,39 +293,51 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut
 // executeWithServiceAccount handles authentication using service account credentials.
 // This method contains the original service account authentication logic.
 func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

-	from := opts.SourceFormat
-	to := sdktranslator.FromString("gemini")
-	originalPayload := bytes.Clone(req.Payload)
-	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
-	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
-	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
-	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
-	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+	var body []byte

-	action := "generateContent"
+	// Handle Imagen models with special request format
+	if isImagenModel(baseModel) {
+		imagenBody, errImagen := convertToImagenRequest(req.Payload)
+		if errImagen != nil {
+			return resp, errImagen
+		}
+		body = imagenBody
+	} else {
+		// Standard Gemini translation flow
+		from := opts.SourceFormat
+		to := sdktranslator.FromString("gemini")
+
+		originalPayload := bytes.Clone(req.Payload)
+		if len(opts.OriginalRequest) > 0 {
+			originalPayload = bytes.Clone(opts.OriginalRequest)
+		}
+		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+		if err != nil {
+			return resp, err
+		}
+
+		body = fixGeminiImageAspectRatio(baseModel, body)
+		requestedModel := payloadRequestedModel(opts, req.Model)
+		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+		body, _ = sjson.SetBytes(body, "model", baseModel)
+	}
+
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
 		}
 	}
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -239,7 +389,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -250,6 +400,16 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
+
+	// For Imagen models, convert response to Gemini format before translation
+	// This ensures Imagen responses use the same format as gemini-3-pro-image-preview
+	if isImagenModel(baseModel) {
+		data = convertImagenToGeminiResponse(data, baseModel)
+	}
+
+	// Standard Gemini translation (works for both Gemini and converted Imagen responses)
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("gemini")
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
@@ -258,37 +418,32 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au

 // executeWithAPIKey handles authentication using API key credentials.
 func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	action := "generateContent"
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
+	}
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -299,7 +454,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -348,7 +503,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -367,37 +522,41 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip

 // executeStreamWithServiceAccount handles streaming authentication using service account credentials.
 func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
-	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
-	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
-	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
+	}
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	action := getVertexAction(baseModel, true)
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")

@@ -442,7 +601,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -487,45 +646,44 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte

 // executeStreamWithAPIKey handles streaming authentication using API key credentials.
 func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
+	}
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	action := getVertexAction(baseModel, true)
 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")

@@ -567,7 +725,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -612,26 +770,27 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth

 // countTokensWithServiceAccount counts tokens using service account credentials.
 func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
+
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
 	}
-	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
-	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model)
+
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")

 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "countTokens")

 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -679,7 +838,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
@@ -688,10 +847,6 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 		return cliproxyexecutor.Response{}, errRead
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
-	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
-		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
-	}
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
 	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
@@ -699,24 +854,20 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context

 // countTokensWithAPIKey handles token counting using API key credentials.
 func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
+
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
 	}
-	translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
-	translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
+
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -726,7 +877,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens")
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens")

 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -771,7 +922,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
@@ -780,10 +931,6 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 		return cliproxyexecutor.Response{}, errRead
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
-	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
-		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
-	}
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
 	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
@@ -870,53 +1017,6 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau
 	return tok.AccessToken, nil
 }

-// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration.
-// It matches the requested model alias against configured models and returns the actual upstream name.
-func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveVertexConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
 // resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth.
 func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey {
 	if auth == nil || e.cfg == nil {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -12,6 +12,7 @@ import (

 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -67,6 +68,8 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth

 // Execute performs a non-streaming chat completion request.
 func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, baseURL := iflowCreds(auth)
 	if strings.TrimSpace(apiKey) == "" {
 		err = fmt.Errorf("iflow executor: missing api key")
@@ -76,7 +79,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		baseURL = iflowauth.DefaultAPIBaseURL
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
@@ -85,17 +88,18 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return resp, errValidate
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
+	if err != nil {
+		return resp, err
 	}
-	body = applyIFlowThinkingConfig(body)
+
 	body = preserveReasoningContentInMessages(body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -138,7 +142,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("iflow request error: status %d body %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -154,6 +158,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	reporter.ensurePublished(ctx)

 	var param any
+	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
+	// the original model name in the response for client compatibility.
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
@@ -161,6 +167,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re

 // ExecuteStream performs a streaming chat completion request.
 func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, baseURL := iflowCreds(auth)
 	if strings.TrimSpace(apiKey) == "" {
 		err = fmt.Errorf("iflow executor: missing api key")
@@ -170,7 +178,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		baseURL = iflowauth.DefaultAPIBaseURL
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
@@ -179,23 +187,23 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return nil, errValidate
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
+	if err != nil {
+		return nil, err
 	}
-	body = applyIFlowThinkingConfig(body)
+
 	body = preserveReasoningContentInMessages(body)
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -236,7 +244,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			log.Errorf("iflow executor: close response body error: %v", errClose)
 		}
 		appendAPIResponseChunk(ctx, e.cfg, data)
-		log.Debugf("iflow streaming error: status %d body %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
 		return nil, err
 	}
@@ -278,11 +286,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }

 func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	enc, err := tokenizerForModel(req.Model)
+	enc, err := tokenizerForModel(baseModel)
 	if err != nil {
 		return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err)
 	}
@@ -520,41 +530,3 @@ func preserveReasoningContentInMessages(body []byte) []byte {

 	return body
 }
-
-// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
-// This should be called after NormalizeThinkingConfig has processed the payload.
-//
-// Model-specific handling:
-//   - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
-//   - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
-func applyIFlowThinkingConfig(body []byte) []byte {
-	effort := gjson.GetBytes(body, "reasoning_effort")
-	if !effort.Exists() {
-		return body
-	}
-
-	model := strings.ToLower(gjson.GetBytes(body, "model").String())
-	val := strings.ToLower(strings.TrimSpace(effort.String()))
-	enableThinking := val != "none" && val != ""
-
-	// Remove reasoning_effort as we'll convert to model-specific format
-	body, _ = sjson.DeleteBytes(body, "reasoning_effort")
-	body, _ = sjson.DeleteBytes(body, "thinking")
-
-	// GLM-4.6/4.7: Use chat_template_kwargs
-	if strings.HasPrefix(model, "glm-4") {
-		body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
-		if enableThinking {
-			body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
-		}
-		return body
-	}
-
-	// MiniMax M2/M2.1: Use reasoning_split
-	if strings.HasPrefix(model, "minimax-m2") {
-		body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
-		return body
-	}
-
-	return body
-}
--- a/internal/runtime/executor/iflow_executor_test.go
+++ b/internal/runtime/executor/iflow_executor_test.go
@@ -0,0 +1,67 @@
+package executor
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+)
+
+func TestIFlowExecutorParseSuffix(t *testing.T) {
+	tests := []struct {
+		name      string
+		model     string
+		wantBase  string
+		wantLevel string
+	}{
+		{"no suffix", "glm-4", "glm-4", ""},
+		{"glm with suffix", "glm-4.1-flash(high)", "glm-4.1-flash", "high"},
+		{"minimax no suffix", "minimax-m2", "minimax-m2", ""},
+		{"minimax with suffix", "minimax-m2.1(medium)", "minimax-m2.1", "medium"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := thinking.ParseSuffix(tt.model)
+			if result.ModelName != tt.wantBase {
+				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
+			}
+		})
+	}
+}
+
+func TestPreserveReasoningContentInMessages(t *testing.T) {
+	tests := []struct {
+		name  string
+		input []byte
+		want  []byte // nil means output should equal input
+	}{
+		{
+			"non-glm model passthrough",
+			[]byte(`{"model":"gpt-4","messages":[]}`),
+			nil,
+		},
+		{
+			"glm model with empty messages",
+			[]byte(`{"model":"glm-4","messages":[]}`),
+			nil,
+		},
+		{
+			"glm model preserves existing reasoning_content",
+			[]byte(`{"model":"glm-4","messages":[{"role":"assistant","content":"hi","reasoning_content":"thinking..."}]}`),
+			nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := preserveReasoningContentInMessages(tt.input)
+			want := tt.want
+			if want == nil {
+				want = tt.input
+			}
+			if string(got) != string(want) {
+				t.Errorf("preserveReasoningContentInMessages() = %s, want %s", got, want)
+			}
+		})
+	}
+}
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -12,7 +12,10 @@ import (

 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
 )

 const (
@@ -332,6 +335,12 @@ func summarizeErrorBody(contentType string, body []byte) string {
 		}
 		return "[html body omitted]"
 	}
+
+	// Try to extract error message from JSON response
+	if message := extractJSONErrorMessage(body); message != "" {
+		return message
+	}
+
 	return string(body)
 }

@@ -358,3 +367,25 @@ func extractHTMLTitle(body []byte) string {
 	}
 	return strings.Join(strings.Fields(title), " ")
 }
+
+// extractJSONErrorMessage attempts to extract error.message from JSON error responses
+func extractJSONErrorMessage(body []byte) string {
+	result := gjson.GetBytes(body, "error.message")
+	if result.Exists() && result.String() != "" {
+		return result.String()
+	}
+	return ""
+}
+
+// logWithRequestID returns a logrus Entry with request_id field populated from context.
+// If no request ID is found in context, it returns the standard logger.
+func logWithRequestID(ctx context.Context) *log.Entry {
+	if ctx == nil {
+		return log.NewEntry(log.StandardLogger())
+	}
+	requestID := logging.GetRequestID(ctx)
+	if requestID == "" {
+		return log.NewEntry(log.StandardLogger())
+	}
+	return log.WithField("request_id", requestID)
+}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -11,6 +11,7 @@ import (
 	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -69,7 +70,9 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau
 }

 func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	baseURL, apiKey := e.resolveCredentials(auth)
@@ -85,18 +88,14 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
-	modelOverride := e.resolveUpstreamModel(req.Model, auth)
-	if modelOverride != "" {
-		translated = e.overrideModel(translated, modelOverride)
-	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
-	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
-	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
-	if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
-		return resp, errValidate
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
+
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
 	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -147,7 +146,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -168,7 +167,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 }

 func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	baseURL, apiKey := e.resolveCredentials(auth)
@@ -176,24 +177,21 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		err = statusErr{code: http.StatusUnauthorized, msg: "missing provider baseURL"}
 		return nil, err
 	}
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	modelOverride := e.resolveUpstreamModel(req.Model, auth)
-	if modelOverride != "" {
-		translated = e.overrideModel(translated, modelOverride)
-	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
-	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
-	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
-	if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
-		return nil, errValidate
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
+
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
 	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -241,7 +239,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("openai compat executor: close response body error: %v", errClose)
 		}
@@ -293,14 +291,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 }

 func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	modelForCounting := req.Model
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		translated = e.overrideModel(translated, modelOverride)
-		modelForCounting = modelOverride
+	modelForCounting := baseModel
+
+	translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
 	}

 	enc, err := tokenizerForModel(modelForCounting)
@@ -336,53 +337,6 @@ func (e *OpenAICompatExecutor) resolveCredentials(auth *cliproxyauth.Auth) (base
 	return
 }

-func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	if alias == "" || auth == nil || e.cfg == nil {
-		return ""
-	}
-	compat := e.resolveCompatConfig(auth)
-	if compat == nil {
-		return ""
-	}
-	for i := range compat.Models {
-		model := compat.Models[i]
-		if model.Alias != "" {
-			if strings.EqualFold(model.Alias, alias) {
-				if model.Name != "" {
-					return model.Name
-				}
-				return alias
-			}
-			continue
-		}
-		if strings.EqualFold(model.Name, alias) {
-			return model.Name
-		}
-	}
-	return ""
-}
-
-func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
-	trimmed := strings.TrimSpace(model)
-	if trimmed == "" || e == nil || e.cfg == nil {
-		return false
-	}
-	compat := e.resolveCompatConfig(auth)
-	if compat == nil || len(compat.Models) == 0 {
-		return false
-	}
-	for i := range compat.Models {
-		entry := compat.Models[i]
-		if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
-			return true
-		}
-		if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
-			return true
-		}
-	}
-	return false
-}
-
 func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -1,125 +1,35 @@
 package executor

 import (
-	"fmt"
-	"net/http"
+	"encoding/json"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )

-// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
-// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
-func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
-	// Use the alias from metadata if available, as it's registered in the global registry
-	// with thinking metadata; the upstream model name may not be registered.
-	lookupModel := util.ResolveOriginalModel(model, metadata)
-
-	// Determine which model to use for thinking support check.
-	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
-	thinkingModel := lookupModel
-	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
-		thinkingModel = model
-	}
-
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
-	if !ok || (budgetOverride == nil && includeOverride == nil) {
-		return payload
-	}
-	if !util.ModelSupportsThinking(thinkingModel) {
-		return payload
-	}
-	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
-		budgetOverride = &norm
-	}
-	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
-}
-
-// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
-// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
-func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
-	// Use the alias from metadata if available, as it's registered in the global registry
-	// with thinking metadata; the upstream model name may not be registered.
-	lookupModel := util.ResolveOriginalModel(model, metadata)
-
-	// Determine which model to use for thinking support check.
-	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
-	thinkingModel := lookupModel
-	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
-		thinkingModel = model
-	}
-
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
-	if !ok || (budgetOverride == nil && includeOverride == nil) {
-		return payload
-	}
-	if !util.ModelSupportsThinking(thinkingModel) {
-		return payload
-	}
-	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
-		budgetOverride = &norm
-	}
-	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
-}
-
-// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
-// Metadata values take precedence over any existing field when the model supports thinking, intentionally
-// overwriting caller-provided values to honor suffix/default metadata priority.
-func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
-	if len(metadata) == 0 {
-		return payload
-	}
-	if field == "" {
-		return payload
-	}
-	baseModel := util.ResolveOriginalModel(model, metadata)
-	if baseModel == "" {
-		baseModel = model
-	}
-	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
-		return payload
-	}
-	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-				return updated
-			}
-		}
-	}
-	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
-	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-			if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
-				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-					return updated
-				}
-			}
-		}
-	}
-	return payload
-}
-
 // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
 // paths as relative to the provided root path (for example, "request" for Gemini CLI)
 // and restricts matches to the given protocol when supplied. Defaults are checked
-// against the original payload when provided.
-func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte) []byte {
+// against the original payload when provided. requestedModel carries the client-visible
+// model name before alias resolution so payload rules can target aliases precisely.
+func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte, requestedModel string) []byte {
 	if cfg == nil || len(payload) == 0 {
 		return payload
 	}
 	rules := cfg.Payload
-	if len(rules.Default) == 0 && len(rules.Override) == 0 {
+	if len(rules.Default) == 0 && len(rules.DefaultRaw) == 0 && len(rules.Override) == 0 && len(rules.OverrideRaw) == 0 {
 		return payload
 	}
 	model = strings.TrimSpace(model)
-	if model == "" {
+	requestedModel = strings.TrimSpace(requestedModel)
+	if model == "" && requestedModel == "" {
 		return payload
 	}
+	candidates := payloadModelCandidates(model, requestedModel)
 	out := payload
 	source := original
 	if len(source) == 0 {
@@ -129,7 +39,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default rules: first write wins per field across all matching rules.
 	for i := range rules.Default {
 		rule := &rules.Default[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -151,10 +61,39 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			appliedDefaults[fullPath] = struct{}{}
 		}
 	}
+	// Apply default raw rules: first write wins per field across all matching rules.
+	for i := range rules.DefaultRaw {
+		rule := &rules.DefaultRaw[i]
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
+			continue
+		}
+		for path, value := range rule.Params {
+			fullPath := buildPayloadPath(root, path)
+			if fullPath == "" {
+				continue
+			}
+			if gjson.GetBytes(source, fullPath).Exists() {
+				continue
+			}
+			if _, ok := appliedDefaults[fullPath]; ok {
+				continue
+			}
+			rawValue, ok := payloadRawValue(value)
+			if !ok {
+				continue
+			}
+			updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
+			if errSet != nil {
+				continue
+			}
+			out = updated
+			appliedDefaults[fullPath] = struct{}{}
+		}
+	}
 	// Apply override rules: last write wins per field across all matching rules.
 	for i := range rules.Override {
 		rule := &rules.Override[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -169,9 +108,43 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			out = updated
 		}
 	}
+	// Apply override raw rules: last write wins per field across all matching rules.
+	for i := range rules.OverrideRaw {
+		rule := &rules.OverrideRaw[i]
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
+			continue
+		}
+		for path, value := range rule.Params {
+			fullPath := buildPayloadPath(root, path)
+			if fullPath == "" {
+				continue
+			}
+			rawValue, ok := payloadRawValue(value)
+			if !ok {
+				continue
+			}
+			updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
+			if errSet != nil {
+				continue
+			}
+			out = updated
+		}
+	}
 	return out
 }

+func payloadRuleMatchesModels(rule *config.PayloadRule, protocol string, models []string) bool {
+	if rule == nil || len(models) == 0 {
+		return false
+	}
+	for _, model := range models {
+		if payloadRuleMatchesModel(rule, model, protocol) {
+			return true
+		}
+	}
+	return false
+}
+
 func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) bool {
 	if rule == nil {
 		return false
@@ -194,6 +167,42 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b
 	return false
 }

+func payloadModelCandidates(model, requestedModel string) []string {
+	model = strings.TrimSpace(model)
+	requestedModel = strings.TrimSpace(requestedModel)
+	if model == "" && requestedModel == "" {
+		return nil
+	}
+	candidates := make([]string, 0, 3)
+	seen := make(map[string]struct{}, 3)
+	addCandidate := func(value string) {
+		value = strings.TrimSpace(value)
+		if value == "" {
+			return
+		}
+		key := strings.ToLower(value)
+		if _, ok := seen[key]; ok {
+			return
+		}
+		seen[key] = struct{}{}
+		candidates = append(candidates, value)
+	}
+	if model != "" {
+		addCandidate(model)
+	}
+	if requestedModel != "" {
+		parsed := thinking.ParseSuffix(requestedModel)
+		base := strings.TrimSpace(parsed.ModelName)
+		if base != "" {
+			addCandidate(base)
+		}
+		if parsed.HasSuffix {
+			addCandidate(requestedModel)
+		}
+	}
+	return candidates
+}
+
 // buildPayloadPath combines an optional root path with a relative parameter path.
 // When root is empty, the parameter path is used as-is. When root is non-empty,
 // the parameter path is treated as relative to root.
@@ -212,6 +221,53 @@ func buildPayloadPath(root, path string) string {
 	return r + "." + p
 }

+func payloadRawValue(value any) ([]byte, bool) {
+	if value == nil {
+		return nil, false
+	}
+	switch typed := value.(type) {
+	case string:
+		return []byte(typed), true
+	case []byte:
+		return typed, true
+	default:
+		raw, errMarshal := json.Marshal(typed)
+		if errMarshal != nil {
+			return nil, false
+		}
+		return raw, true
+	}
+}
+
+func payloadRequestedModel(opts cliproxyexecutor.Options, fallback string) string {
+	fallback = strings.TrimSpace(fallback)
+	if len(opts.Metadata) == 0 {
+		return fallback
+	}
+	raw, ok := opts.Metadata[cliproxyexecutor.RequestedModelMetadataKey]
+	if !ok || raw == nil {
+		return fallback
+	}
+	switch v := raw.(type) {
+	case string:
+		if strings.TrimSpace(v) == "" {
+			return fallback
+		}
+		return strings.TrimSpace(v)
+	case []byte:
+		if len(v) == 0 {
+			return fallback
+		}
+		trimmed := strings.TrimSpace(string(v))
+		if trimmed == "" {
+			return fallback
+		}
+		return trimmed
+	default:
+		return fallback
+	}
+}
+
 // matchModelPattern performs simple wildcard matching where '*' matches zero or more characters.
 // Examples:
 //
@@ -256,102 +312,3 @@ func matchModelPattern(pattern, model string) bool {
 	}
 	return pi == len(pattern)
 }
-
-// NormalizeThinkingConfig normalizes thinking-related fields in the payload
-// based on model capabilities. For models without thinking support, it strips
-// reasoning fields. For models with level-based thinking, it validates and
-// normalizes the reasoning effort level. For models with numeric budget thinking,
-// it strips the effort string fields.
-func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
-	if len(payload) == 0 || model == "" {
-		return payload
-	}
-
-	if !util.ModelSupportsThinking(model) {
-		if allowCompat {
-			return payload
-		}
-		return StripThinkingFields(payload, false)
-	}
-
-	if util.ModelUsesThinkingLevels(model) {
-		return NormalizeReasoningEffortLevel(payload, model)
-	}
-
-	// Model supports thinking but uses numeric budgets, not levels.
-	// Strip effort string fields since they are not applicable.
-	return StripThinkingFields(payload, true)
-}
-
-// StripThinkingFields removes thinking-related fields from the payload for
-// models that do not support thinking. If effortOnly is true, only removes
-// effort string fields (for models using numeric budgets).
-func StripThinkingFields(payload []byte, effortOnly bool) []byte {
-	fieldsToRemove := []string{
-		"reasoning_effort",
-		"reasoning.effort",
-	}
-	if !effortOnly {
-		fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...)
-	}
-	out := payload
-	for _, field := range fieldsToRemove {
-		if gjson.GetBytes(out, field).Exists() {
-			out, _ = sjson.DeleteBytes(out, field)
-		}
-	}
-	return out
-}
-
-// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
-// or reasoning.effort field for level-based thinking models.
-func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
-	out := payload
-
-	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
-		}
-	}
-
-	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
-		}
-	}
-
-	return out
-}
-
-// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
-// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
-// downgrading requests.
-func ValidateThinkingConfig(payload []byte, model string) error {
-	if len(payload) == 0 || model == "" {
-		return nil
-	}
-	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
-		return nil
-	}
-
-	levels := util.GetModelThinkingLevels(model)
-	checkField := func(path string) error {
-		if effort := gjson.GetBytes(payload, path); effort.Exists() {
-			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
-				return statusErr{
-					code: http.StatusBadRequest,
-					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
-				}
-			}
-		}
-		return nil
-	}
-
-	if err := checkField("reasoning_effort"); err != nil {
-		return err
-	}
-	if err := checkField("reasoning.effort"); err != nil {
-		return err
-	}
-	return nil
-}
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -12,6 +12,7 @@ import (

 	qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -65,12 +66,14 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
 }

 func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	token, baseURL := qwenCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

+	token, baseURL := qwenCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://portal.qwen.ai/v1"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
@@ -79,15 +82,17 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return resp, errValidate
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -128,7 +133,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -140,18 +145,22 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseOpenAIUsage(data))
 	var param any
+	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
+	// the original model name in the response for client compatibility.
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }

 func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	token, baseURL := qwenCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName

+	token, baseURL := qwenCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://portal.qwen.ai/v1"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
@@ -160,15 +169,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return nil, errValidate
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
 	}
+
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.
@@ -176,7 +185,8 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
 	}
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -212,7 +222,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("qwen executor: close response body error: %v", errClose)
 		}
@@ -256,13 +266,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 }

 func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

 	modelName := gjson.GetBytes(body, "model").String()
 	if strings.TrimSpace(modelName) == "" {
-		modelName = req.Model
+		modelName = baseModel
 	}

 	enc, err := tokenizerForModel(modelName)
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -0,0 +1,30 @@
+package executor
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+)
+
+func TestQwenExecutorParseSuffix(t *testing.T) {
+	tests := []struct {
+		name      string
+		model     string
+		wantBase  string
+		wantLevel string
+	}{
+		{"no suffix", "qwen-max", "qwen-max", ""},
+		{"with level suffix", "qwen-max(high)", "qwen-max", "high"},
+		{"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"},
+		{"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := thinking.ParseSuffix(tt.model)
+			if result.ModelName != tt.wantBase {
+				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
+			}
+		})
+	}
+}
--- a/internal/runtime/executor/thinking_providers.go
+++ b/internal/runtime/executor/thinking_providers.go
@@ -0,0 +1,11 @@
+package executor
+
+import (
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
+)
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -0,0 +1,487 @@
+// Package thinking provides unified thinking configuration processing.
+package thinking
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+)
+
+// providerAppliers maps provider names to their ProviderApplier implementations.
+var providerAppliers = map[string]ProviderApplier{
+	"gemini":      nil,
+	"gemini-cli":  nil,
+	"claude":      nil,
+	"openai":      nil,
+	"codex":       nil,
+	"iflow":       nil,
+	"antigravity": nil,
+}
+
+// GetProviderApplier returns the ProviderApplier for the given provider name.
+// Returns nil if the provider is not registered.
+func GetProviderApplier(provider string) ProviderApplier {
+	return providerAppliers[provider]
+}
+
+// RegisterProvider registers a provider applier by name.
+func RegisterProvider(name string, applier ProviderApplier) {
+	providerAppliers[name] = applier
+}
+
+// IsUserDefinedModel reports whether the model is a user-defined model that should
+// have thinking configuration passed through without validation.
+//
+// User-defined models are configured via config file's models[] array
+// (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models
+// are marked with UserDefined=true at registration time.
+//
+// User-defined models should have their thinking configuration applied directly,
+// letting the upstream service validate the configuration.
+func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
+	if modelInfo == nil {
+		return true
+	}
+	return modelInfo.UserDefined
+}
+
+// ApplyThinking applies thinking configuration to a request body.
+//
+// This is the unified entry point for all providers. It follows the processing
+// order defined in FR25: route check → model capability query → config extraction
+// → validation → application.
+//
+// Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
+// the suffix configuration takes priority over any thinking parameters in the request body.
+// This enables users to override thinking settings via the model name without modifying their
+// request payload.
+//
+// Parameters:
+//   - body: Original request body JSON
+//   - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
+//   - fromFormat: Source request format (e.g., openai, codex, gemini)
+//   - toFormat: Target provider format for the request body (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
+//   - providerKey: Provider identifier used for registry model lookups (may differ from toFormat, e.g., openrouter -> openai)
+//
+// Returns:
+//   - Modified request body JSON with thinking configuration applied
+//   - Error if validation fails (ThinkingError). On error, the original body
+//     is returned (not nil) to enable defensive programming patterns.
+//
+// Passthrough behavior (returns original body without error):
+//   - Unknown provider (not in providerAppliers map)
+//   - modelInfo.Thinking is nil (model doesn't support thinking)
+//
+// Note: Unknown models (modelInfo is nil) are treated as user-defined models: we skip
+// validation and still apply the thinking config so the upstream can validate it.
+//
+// Example:
+//
+//	// With suffix - suffix config takes priority
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini", "gemini")
+//
+//	// Without suffix - uses body config
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini", "gemini")
+func ApplyThinking(body []byte, model string, fromFormat string, toFormat string, providerKey string) ([]byte, error) {
+	providerFormat := strings.ToLower(strings.TrimSpace(toFormat))
+	providerKey = strings.ToLower(strings.TrimSpace(providerKey))
+	if providerKey == "" {
+		providerKey = providerFormat
+	}
+	fromFormat = strings.ToLower(strings.TrimSpace(fromFormat))
+	if fromFormat == "" {
+		fromFormat = providerFormat
+	}
+	// 1. Route check: Get provider applier
+	applier := GetProviderApplier(providerFormat)
+	if applier == nil {
+		log.WithFields(log.Fields{
+			"provider": providerFormat,
+			"model":    model,
+		}).Debug("thinking: unknown provider, passthrough |")
+		return body, nil
+	}
+
+	// 2. Parse suffix and get modelInfo
+	suffixResult := ParseSuffix(model)
+	baseModel := suffixResult.ModelName
+	// Use provider-specific lookup to handle capability differences across providers.
+	modelInfo := registry.LookupModelInfo(baseModel, providerKey)
+
+	// 3. Model capability check
+	// Unknown models are treated as user-defined so thinking config can still be applied.
+	// The upstream service is responsible for validating the configuration.
+	if IsUserDefinedModel(modelInfo) {
+		return applyUserDefinedModel(body, modelInfo, fromFormat, providerFormat, suffixResult)
+	}
+	if modelInfo.Thinking == nil {
+		config := extractThinkingConfig(body, providerFormat)
+		if hasThinkingConfig(config) {
+			log.WithFields(log.Fields{
+				"model":    baseModel,
+				"provider": providerFormat,
+			}).Debug("thinking: model does not support thinking, stripping config |")
+			return StripThinkingConfig(body, providerFormat), nil
+		}
+		log.WithFields(log.Fields{
+			"provider": providerFormat,
+			"model":    baseModel,
+		}).Debug("thinking: model does not support thinking, passthrough |")
+		return body, nil
+	}
+
+	// 4. Get config: suffix priority over body
+	var config ThinkingConfig
+	if suffixResult.HasSuffix {
+		config = parseSuffixToConfig(suffixResult.RawSuffix, providerFormat, model)
+		log.WithFields(log.Fields{
+			"provider": providerFormat,
+			"model":    model,
+			"mode":     config.Mode,
+			"budget":   config.Budget,
+			"level":    config.Level,
+		}).Debug("thinking: config from model suffix |")
+	} else {
+		config = extractThinkingConfig(body, providerFormat)
+		if hasThinkingConfig(config) {
+			log.WithFields(log.Fields{
+				"provider": providerFormat,
+				"model":    modelInfo.ID,
+				"mode":     config.Mode,
+				"budget":   config.Budget,
+				"level":    config.Level,
+			}).Debug("thinking: original config from request |")
+		}
+	}
+
+	if !hasThinkingConfig(config) {
+		log.WithFields(log.Fields{
+			"provider": providerFormat,
+			"model":    modelInfo.ID,
+		}).Debug("thinking: no config found, passthrough |")
+		return body, nil
+	}
+
+	// 5. Validate and normalize configuration
+	validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat, suffixResult.HasSuffix)
+	if err != nil {
+		log.WithFields(log.Fields{
+			"provider": providerFormat,
+			"model":    modelInfo.ID,
+			"error":    err.Error(),
+		}).Warn("thinking: validation failed |")
+		// Return original body on validation failure (defensive programming).
+		// This ensures callers who ignore the error won't receive nil body.
+		// The upstream service will decide how to handle the unmodified request.
+		return body, err
+	}
+
+	// Defensive check: ValidateConfig should never return (nil, nil)
+	if validated == nil {
+		log.WithFields(log.Fields{
+			"provider": providerFormat,
+			"model":    modelInfo.ID,
+		}).Warn("thinking: ValidateConfig returned nil config without error, passthrough |")
+		return body, nil
+	}
+
+	log.WithFields(log.Fields{
+		"provider": providerFormat,
+		"model":    modelInfo.ID,
+		"mode":     validated.Mode,
+		"budget":   validated.Budget,
+		"level":    validated.Level,
+	}).Debug("thinking: processed config to apply |")
+
+	// 6. Apply configuration using provider-specific applier
+	return applier.Apply(body, *validated, modelInfo)
+}
+
+// parseSuffixToConfig converts a raw suffix string to ThinkingConfig.
+//
+// Parsing priority:
+//  1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto
+//  2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel
+//  3. Numeric values: positive integers → ModeBudget, 0 → ModeNone
+//
+// If none of the above match, returns empty ThinkingConfig (treated as no config).
+func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig {
+	// 1. Try special values first (none, auto, -1)
+	if mode, ok := ParseSpecialSuffix(rawSuffix); ok {
+		switch mode {
+		case ModeNone:
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case ModeAuto:
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		}
+	}
+
+	// 2. Try level parsing (minimal, low, medium, high, xhigh)
+	if level, ok := ParseLevelSuffix(rawSuffix); ok {
+		return ThinkingConfig{Mode: ModeLevel, Level: level}
+	}
+
+	// 3. Try numeric parsing
+	if budget, ok := ParseNumericSuffix(rawSuffix); ok {
+		if budget == 0 {
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		}
+		return ThinkingConfig{Mode: ModeBudget, Budget: budget}
+	}
+
+	// Unknown suffix format - return empty config
+	log.WithFields(log.Fields{
+		"provider":   provider,
+		"model":      model,
+		"raw_suffix": rawSuffix,
+	}).Debug("thinking: unknown suffix format, treating as no config |")
+	return ThinkingConfig{}
+}
+
+// applyUserDefinedModel applies thinking configuration for user-defined models
+// without ThinkingSupport validation.
+func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, fromFormat, toFormat string, suffixResult SuffixResult) ([]byte, error) {
+	// Get model ID for logging
+	modelID := ""
+	if modelInfo != nil {
+		modelID = modelInfo.ID
+	} else {
+		modelID = suffixResult.ModelName
+	}
+
+	// Get config: suffix priority over body
+	var config ThinkingConfig
+	if suffixResult.HasSuffix {
+		config = parseSuffixToConfig(suffixResult.RawSuffix, toFormat, modelID)
+	} else {
+		config = extractThinkingConfig(body, toFormat)
+	}
+
+	if !hasThinkingConfig(config) {
+		log.WithFields(log.Fields{
+			"model":    modelID,
+			"provider": toFormat,
+		}).Debug("thinking: user-defined model, passthrough (no config) |")
+		return body, nil
+	}
+
+	applier := GetProviderApplier(toFormat)
+	if applier == nil {
+		log.WithFields(log.Fields{
+			"model":    modelID,
+			"provider": toFormat,
+		}).Debug("thinking: user-defined model, passthrough (unknown provider) |")
+		return body, nil
+	}
+
+	log.WithFields(log.Fields{
+		"provider": toFormat,
+		"model":    modelID,
+		"mode":     config.Mode,
+		"budget":   config.Budget,
+		"level":    config.Level,
+	}).Debug("thinking: applying config for user-defined model (skip validation)")
+
+	config = normalizeUserDefinedConfig(config, fromFormat, toFormat)
+	return applier.Apply(body, config, modelInfo)
+}
+
+func normalizeUserDefinedConfig(config ThinkingConfig, fromFormat, toFormat string) ThinkingConfig {
+	if config.Mode != ModeLevel {
+		return config
+	}
+	if !isBudgetBasedProvider(toFormat) || !isLevelBasedProvider(fromFormat) {
+		return config
+	}
+	budget, ok := ConvertLevelToBudget(string(config.Level))
+	if !ok {
+		return config
+	}
+	config.Mode = ModeBudget
+	config.Budget = budget
+	config.Level = ""
+	return config
+}
+
+// extractThinkingConfig extracts provider-specific thinking config from request body.
+func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		return ThinkingConfig{}
+	}
+
+	switch provider {
+	case "claude":
+		return extractClaudeConfig(body)
+	case "gemini", "gemini-cli", "antigravity":
+		return extractGeminiConfig(body, provider)
+	case "openai":
+		return extractOpenAIConfig(body)
+	case "codex":
+		return extractCodexConfig(body)
+	case "iflow":
+		config := extractIFlowConfig(body)
+		if hasThinkingConfig(config) {
+			return config
+		}
+		return extractOpenAIConfig(body)
+	default:
+		return ThinkingConfig{}
+	}
+}
+
+func hasThinkingConfig(config ThinkingConfig) bool {
+	return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
+}
+
+// extractClaudeConfig extracts thinking configuration from Claude format request body.
+//
+// Claude API format:
+//   - thinking.type: "enabled" or "disabled"
+//   - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget)
+//
+// Priority: thinking.type="disabled" takes precedence over budget_tokens.
+// When type="enabled" without budget_tokens, returns ModeAuto to indicate
+// the user wants thinking enabled but didn't specify a budget.
+func extractClaudeConfig(body []byte) ThinkingConfig {
+	thinkingType := gjson.GetBytes(body, "thinking.type").String()
+	if thinkingType == "disabled" {
+		return ThinkingConfig{Mode: ModeNone, Budget: 0}
+	}
+
+	// Check budget_tokens
+	if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() {
+		value := int(budget.Int())
+		switch value {
+		case 0:
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case -1:
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		default:
+			return ThinkingConfig{Mode: ModeBudget, Budget: value}
+		}
+	}
+
+	// If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified)
+	if thinkingType == "enabled" {
+		return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractGeminiConfig extracts thinking configuration from Gemini format request body.
+//
+// Gemini API format:
+//   - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3)
+//   - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5)
+//
+// For gemini-cli and antigravity providers, the path is prefixed with "request.".
+//
+// Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format).
+// This allows newer Gemini 3 level-based configs to take precedence.
+func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
+	prefix := "generationConfig.thinkingConfig"
+	if provider == "gemini-cli" || provider == "antigravity" {
+		prefix = "request.generationConfig.thinkingConfig"
+	}
+
+	// Check thinkingLevel first (Gemini 3 format takes precedence)
+	if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
+		value := level.String()
+		switch value {
+		case "none":
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case "auto":
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		default:
+			return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
+		}
+	}
+
+	// Check thinkingBudget (Gemini 2.5 format)
+	if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
+		value := int(budget.Int())
+		switch value {
+		case 0:
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case -1:
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		default:
+			return ThinkingConfig{Mode: ModeBudget, Budget: value}
+		}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractOpenAIConfig extracts thinking configuration from OpenAI format request body.
+//
+// OpenAI API format:
+//   - reasoning_effort: "none", "low", "medium", "high" (discrete levels)
+//
+// OpenAI uses level-based thinking configuration only, no numeric budget support.
+// The "none" value is treated specially to return ModeNone.
+func extractOpenAIConfig(body []byte) ThinkingConfig {
+	// Check reasoning_effort (OpenAI Chat Completions format)
+	if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() {
+		value := effort.String()
+		if value == "none" {
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		}
+		return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractCodexConfig extracts thinking configuration from Codex format request body.
+//
+// Codex API format (OpenAI Responses API):
+//   - reasoning.effort: "none", "low", "medium", "high"
+//
+// This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort".
+func extractCodexConfig(body []byte) ThinkingConfig {
+	// Check reasoning.effort (Codex / OpenAI Responses API format)
+	if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() {
+		value := effort.String()
+		if value == "none" {
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		}
+		return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractIFlowConfig extracts thinking configuration from iFlow format request body.
+//
+// iFlow API format (supports multiple model families):
+//   - GLM format: chat_template_kwargs.enable_thinking (boolean)
+//   - MiniMax format: reasoning_split (boolean)
+//
+// Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled".
+// The actual budget/configuration is determined by the iFlow applier based on model capabilities.
+// Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off.
+func extractIFlowConfig(body []byte) ThinkingConfig {
+	// GLM format: chat_template_kwargs.enable_thinking
+	if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() {
+		if enabled.Bool() {
+			// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
+			return ThinkingConfig{Mode: ModeBudget, Budget: 1}
+		}
+		return ThinkingConfig{Mode: ModeNone, Budget: 0}
+	}
+
+	// MiniMax format: reasoning_split
+	if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() {
+		if split.Bool() {
+			// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
+			return ThinkingConfig{Mode: ModeBudget, Budget: 1}
+		}
+		return ThinkingConfig{Mode: ModeNone, Budget: 0}
+	}
+
+	return ThinkingConfig{}
+}
--- a/internal/thinking/convert.go
+++ b/internal/thinking/convert.go
@@ -0,0 +1,142 @@
+package thinking
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+)
+
+// levelToBudgetMap defines the standard Level → Budget mapping.
+// All keys are lowercase; lookups should use strings.ToLower.
+var levelToBudgetMap = map[string]int{
+	"none":    0,
+	"auto":    -1,
+	"minimal": 512,
+	"low":     1024,
+	"medium":  8192,
+	"high":    24576,
+	"xhigh":   32768,
+}
+
+// ConvertLevelToBudget converts a thinking level to a budget value.
+//
+// This is a semantic conversion that maps discrete levels to numeric budgets.
+// Level matching is case-insensitive.
+//
+// Level → Budget mapping:
+//   - none    → 0
+//   - auto    → -1
+//   - minimal → 512
+//   - low     → 1024
+//   - medium  → 8192
+//   - high    → 24576
+//   - xhigh   → 32768
+//
+// Returns:
+//   - budget: The converted budget value
+//   - ok: true if level is valid, false otherwise
+func ConvertLevelToBudget(level string) (int, bool) {
+	budget, ok := levelToBudgetMap[strings.ToLower(level)]
+	return budget, ok
+}
+
+// BudgetThreshold constants define the upper bounds for each thinking level.
+// These are used by ConvertBudgetToLevel for range-based mapping.
+const (
+	// ThresholdMinimal is the upper bound for "minimal" level (1-512)
+	ThresholdMinimal = 512
+	// ThresholdLow is the upper bound for "low" level (513-1024)
+	ThresholdLow = 1024
+	// ThresholdMedium is the upper bound for "medium" level (1025-8192)
+	ThresholdMedium = 8192
+	// ThresholdHigh is the upper bound for "high" level (8193-24576)
+	ThresholdHigh = 24576
+)
+
+// ConvertBudgetToLevel converts a budget value to the nearest thinking level.
+//
+// This is a semantic conversion that maps numeric budgets to discrete levels.
+// Uses threshold-based mapping for range conversion.
+//
+// Budget → Level thresholds:
+//   - -1        → auto
+//   - 0         → none
+//   - 1-512     → minimal
+//   - 513-1024  → low
+//   - 1025-8192 → medium
+//   - 8193-24576 → high
+//   - 24577+    → xhigh
+//
+// Returns:
+//   - level: The converted thinking level string
+//   - ok: true if budget is valid, false for invalid negatives (< -1)
+func ConvertBudgetToLevel(budget int) (string, bool) {
+	switch {
+	case budget < -1:
+		// Invalid negative values
+		return "", false
+	case budget == -1:
+		return string(LevelAuto), true
+	case budget == 0:
+		return string(LevelNone), true
+	case budget <= ThresholdMinimal:
+		return string(LevelMinimal), true
+	case budget <= ThresholdLow:
+		return string(LevelLow), true
+	case budget <= ThresholdMedium:
+		return string(LevelMedium), true
+	case budget <= ThresholdHigh:
+		return string(LevelHigh), true
+	default:
+		return string(LevelXHigh), true
+	}
+}
+
+// ModelCapability describes the thinking format support of a model.
+type ModelCapability int
+
+const (
+	// CapabilityUnknown indicates modelInfo is nil (passthrough behavior, internal use).
+	CapabilityUnknown ModelCapability = iota - 1
+	// CapabilityNone indicates model doesn't support thinking (Thinking is nil).
+	CapabilityNone
+	// CapabilityBudgetOnly indicates the model supports numeric budgets only.
+	CapabilityBudgetOnly
+	// CapabilityLevelOnly indicates the model supports discrete levels only.
+	CapabilityLevelOnly
+	// CapabilityHybrid indicates the model supports both budgets and levels.
+	CapabilityHybrid
+)
+
+// detectModelCapability determines the thinking format capability of a model.
+//
+// This is an internal function used by validation and conversion helpers.
+// It analyzes the model's ThinkingSupport configuration to classify the model:
+//   - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking)
+//   - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5)
+//   - CapabilityLevelOnly: Has Levels but no Min/Max (OpenAI, iFlow)
+//   - CapabilityHybrid: Has both Min/Max and Levels (Gemini 3)
+//
+// Note: Returns a special sentinel value when modelInfo itself is nil (unknown model).
+func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability {
+	if modelInfo == nil {
+		return CapabilityUnknown // sentinel for "passthrough" behavior
+	}
+	if modelInfo.Thinking == nil {
+		return CapabilityNone
+	}
+	support := modelInfo.Thinking
+	hasBudget := support.Min > 0 || support.Max > 0
+	hasLevels := len(support.Levels) > 0
+
+	switch {
+	case hasBudget && hasLevels:
+		return CapabilityHybrid
+	case hasBudget:
+		return CapabilityBudgetOnly
+	case hasLevels:
+		return CapabilityLevelOnly
+	default:
+		return CapabilityNone
+	}
+}
--- a/internal/thinking/errors.go
+++ b/internal/thinking/errors.go
@@ -0,0 +1,82 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import "net/http"
+
+// ErrorCode represents the type of thinking configuration error.
+type ErrorCode string
+
+// Error codes for thinking configuration processing.
+const (
+	// ErrInvalidSuffix indicates the suffix format cannot be parsed.
+	// Example: "model(abc" (missing closing parenthesis)
+	ErrInvalidSuffix ErrorCode = "INVALID_SUFFIX"
+
+	// ErrUnknownLevel indicates the level value is not in the valid list.
+	// Example: "model(ultra)" where "ultra" is not a valid level
+	ErrUnknownLevel ErrorCode = "UNKNOWN_LEVEL"
+
+	// ErrThinkingNotSupported indicates the model does not support thinking.
+	// Example: claude-haiku-4-5 does not have thinking capability
+	ErrThinkingNotSupported ErrorCode = "THINKING_NOT_SUPPORTED"
+
+	// ErrLevelNotSupported indicates the model does not support level mode.
+	// Example: using level with a budget-only model
+	ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED"
+
+	// ErrBudgetOutOfRange indicates the budget value is outside model range.
+	// Example: budget 64000 exceeds max 20000
+	ErrBudgetOutOfRange ErrorCode = "BUDGET_OUT_OF_RANGE"
+
+	// ErrProviderMismatch indicates the provider does not match the model.
+	// Example: applying Claude format to a Gemini model
+	ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH"
+)
+
+// ThinkingError represents an error that occurred during thinking configuration processing.
+//
+// This error type provides structured information about the error, including:
+//   - Code: A machine-readable error code for programmatic handling
+//   - Message: A human-readable description of the error
+//   - Model: The model name related to the error (optional)
+//   - Details: Additional context information (optional)
+type ThinkingError struct {
+	// Code is the machine-readable error code
+	Code ErrorCode
+	// Message is the human-readable error description.
+	// Should be lowercase, no trailing period, with context if applicable.
+	Message string
+	// Model is the model name related to this error (optional)
+	Model string
+	// Details contains additional context information (optional)
+	Details map[string]interface{}
+}
+
+// Error implements the error interface.
+// Returns the message directly without code prefix.
+// Use Code field for programmatic error handling.
+func (e *ThinkingError) Error() string {
+	return e.Message
+}
+
+// NewThinkingError creates a new ThinkingError with the given code and message.
+func NewThinkingError(code ErrorCode, message string) *ThinkingError {
+	return &ThinkingError{
+		Code:    code,
+		Message: message,
+	}
+}
+
+// NewThinkingErrorWithModel creates a new ThinkingError with model context.
+func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingError {
+	return &ThinkingError{
+		Code:    code,
+		Message: message,
+		Model:   model,
+	}
+}
+
+// StatusCode implements a portable status code interface for HTTP handlers.
+func (e *ThinkingError) StatusCode() int {
+	return http.StatusBadRequest
+}
--- a/internal/thinking/provider/antigravity/apply.go
+++ b/internal/thinking/provider/antigravity/apply.go
@@ -0,0 +1,201 @@
+// Package antigravity implements thinking configuration for Antigravity API format.
+//
+// Antigravity uses request.generationConfig.thinkingConfig.* path (same as gemini-cli)
+// but requires additional normalization for Claude models:
+//   - Ensure thinking budget < max_tokens
+//   - Remove thinkingConfig if budget < minimum allowed
+package antigravity
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Antigravity API format.
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Antigravity thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("antigravity", NewApplier())
+}
+
+// Apply applies thinking configuration to Antigravity request body.
+//
+// For Claude models, additional constraints are applied:
+//   - Ensure thinking budget < max_tokens
+//   - Remove thinkingConfig if budget < minimum allowed
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return a.applyCompatible(body, config, modelInfo)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	isClaude := strings.Contains(strings.ToLower(modelInfo.ID), "claude")
+
+	// ModeAuto: Always use Budget format with thinkingBudget=-1
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+	}
+	if config.Mode == thinking.ModeBudget {
+		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+	}
+
+	// For non-auto modes, choose format based on model capabilities
+	support := modelInfo.Thinking
+	if len(support.Levels) > 0 {
+		return a.applyLevelFormat(body, config)
+	}
+	return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	isClaude := false
+	if modelInfo != nil {
+		isClaude = strings.Contains(strings.ToLower(modelInfo.ID), "claude")
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo, isClaude bool) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
+
+	budget := config.Budget
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	// Apply Claude-specific constraints
+	if isClaude && modelInfo != nil {
+		budget, result = a.normalizeClaudeBudget(budget, result, modelInfo)
+		// Check if budget was removed entirely
+		if budget == -2 {
+			return result, nil
+		}
+	}
+
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
+
+// normalizeClaudeBudget applies Claude-specific constraints to thinking budget.
+//
+// It handles:
+//   - Ensuring thinking budget < max_tokens
+//   - Removing thinkingConfig if budget < minimum allowed
+//
+// Returns the normalized budget and updated payload.
+// Returns budget=-2 as a sentinel indicating thinkingConfig was removed entirely.
+func (a *Applier) normalizeClaudeBudget(budget int, payload []byte, modelInfo *registry.ModelInfo) (int, []byte) {
+	if modelInfo == nil {
+		return budget, payload
+	}
+
+	// Get effective max tokens
+	effectiveMax, setDefaultMax := a.effectiveMaxTokens(payload, modelInfo)
+	if effectiveMax > 0 && budget >= effectiveMax {
+		budget = effectiveMax - 1
+	}
+
+	// Check minimum budget
+	minBudget := 0
+	if modelInfo.Thinking != nil {
+		minBudget = modelInfo.Thinking.Min
+	}
+	if minBudget > 0 && budget >= 0 && budget < minBudget {
+		// Budget is below minimum, remove thinking config entirely
+		payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
+		return -2, payload
+	}
+
+	// Set default max tokens if needed
+	if setDefaultMax && effectiveMax > 0 {
+		payload, _ = sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax)
+	}
+
+	return budget, payload
+}
+
+// effectiveMaxTokens returns the max tokens to cap thinking:
+// prefer request-provided maxOutputTokens; otherwise fall back to model default.
+// The boolean indicates whether the value came from the model default (and thus should be written back).
+func (a *Applier) effectiveMaxTokens(payload []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
+	if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
+		return int(maxTok.Int()), false
+	}
+	if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+		return modelInfo.MaxCompletionTokens, true
+	}
+	return 0, false
+}
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -0,0 +1,166 @@
+// Package claude implements thinking configuration scaffolding for Claude models.
+//
+// Claude models use the thinking.budget_tokens format with values in the range
+// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5),
+// while older models do not.
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-6
+package claude
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for Claude models.
+// This applier is stateless and holds no configuration.
+type Applier struct{}
+
+// NewApplier creates a new Claude thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("claude", NewApplier())
+}
+
+// Apply applies thinking configuration to Claude request body.
+//
+// IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig.
+// ValidateConfig handles:
+//   - Mode conversion (Level→Budget, Auto→Budget)
+//   - Budget clamping to model range
+//   - ZeroAllowed constraint enforcement
+//
+// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged.
+//
+// Expected output format when enabled:
+//
+//	{
+//	  "thinking": {
+//	    "type": "enabled",
+//	    "budget_tokens": 16384
+//	  }
+//	}
+//
+// Expected output format when disabled:
+//
+//	{
+//	  "thinking": {
+//	    "type": "disabled"
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return applyCompatibleClaude(body, config)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	// Only process ModeBudget and ModeNone; other modes pass through
+	// (caller should use ValidateConfig first to normalize modes)
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced)
+	// Decide enabled/disabled based on budget value
+	if config.Budget == 0 {
+		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+	result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+
+	// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
+	result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
+	return result, nil
+}
+
+// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens.
+// Anthropic API requires this constraint; violating it returns a 400 error.
+func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte {
+	if budgetTokens <= 0 {
+		return body
+	}
+
+	// Ensure the request satisfies Claude constraints:
+	//  1) Determine effective max_tokens (request overrides model default)
+	//  2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
+	//  3) If the adjusted budget falls below the model minimum, leave the request unchanged
+	//  4) If max_tokens came from model default, write it back into the request
+
+	effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
+	if setDefaultMax && effectiveMax > 0 {
+		body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
+	}
+
+	// Compute the budget we would apply after enforcing budget_tokens < max_tokens.
+	adjustedBudget := budgetTokens
+	if effectiveMax > 0 && adjustedBudget >= effectiveMax {
+		adjustedBudget = effectiveMax - 1
+	}
+
+	minBudget := 0
+	if modelInfo != nil && modelInfo.Thinking != nil {
+		minBudget = modelInfo.Thinking.Min
+	}
+	if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
+		// If enforcing the max_tokens constraint would push the budget below the model minimum,
+		// leave the request unchanged.
+		return body
+	}
+
+	if adjustedBudget != budgetTokens {
+		body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget)
+	}
+
+	return body
+}
+
+// effectiveMaxTokens returns the max tokens to cap thinking:
+// prefer request-provided max_tokens; otherwise fall back to model default.
+// The boolean indicates whether the value came from the model default (and thus should be written back).
+func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
+	if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 {
+		return int(maxTok.Int()), false
+	}
+	if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+		return modelInfo.MaxCompletionTokens, true
+	}
+	return 0, false
+}
+
+func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	switch config.Mode {
+	case thinking.ModeNone:
+		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	case thinking.ModeAuto:
+		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	default:
+		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+		return result, nil
+	}
+}
--- a/internal/thinking/provider/codex/apply.go
+++ b/internal/thinking/provider/codex/apply.go
@@ -0,0 +1,131 @@
+// Package codex implements thinking configuration for Codex (OpenAI Responses API) models.
+//
+// Codex models use the reasoning.effort format with discrete levels
+// (low/medium/high). This is similar to OpenAI but uses nested field
+// "reasoning.effort" instead of "reasoning_effort".
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
+package codex
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for Codex models.
+//
+// Codex-specific behavior:
+//   - Output format: reasoning.effort (string: low/medium/high/xhigh)
+//   - Level-only mode: no numeric budget support
+//   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Codex thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("codex", NewApplier())
+}
+
+// Apply applies thinking configuration to Codex request body.
+//
+// Expected output format:
+//
+//	{
+//	  "reasoning": {
+//	    "effort": "high"
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return applyCompatibleCodex(body, config)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
+	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeLevel {
+		result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level))
+		return result, nil
+	}
+
+	effort := ""
+	support := modelInfo.Thinking
+	if config.Budget == 0 {
+		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
+			effort = string(thinking.LevelNone)
+		}
+	}
+	if effort == "" && config.Level != "" {
+		effort = string(config.Level)
+	}
+	if effort == "" && len(support.Levels) > 0 {
+		effort = support.Levels[0]
+	}
+	if effort == "" {
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
+	return result, nil
+}
+
+func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		effort = string(thinking.LevelNone)
+		if config.Level != "" {
+			effort = string(config.Level)
+		}
+	case thinking.ModeAuto:
+		// Auto mode for user-defined models: pass through as "auto"
+		effort = string(thinking.LevelAuto)
+	case thinking.ModeBudget:
+		// Budget mode: convert budget to level using threshold mapping
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	default:
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
+	return result, nil
+}
+
+func hasLevel(levels []string, target string) bool {
+	for _, level := range levels {
+		if strings.EqualFold(strings.TrimSpace(level), target) {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -0,0 +1,169 @@
+// Package gemini implements thinking configuration for Gemini models.
+//
+// Gemini models have two formats:
+//   - Gemini 2.5: Uses thinkingBudget (numeric)
+//   - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high)
+//     or thinkingBudget=-1 for auto/dynamic mode
+//
+// Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels:
+//   - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x)
+//   - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels)
+//   - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5)
+package gemini
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Gemini models.
+//
+// Gemini-specific behavior:
+//   - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed
+//   - Gemini 3.x: thinkingLevel format, cannot be disabled
+//   - Use ThinkingSupport.Levels to decide output format
+type Applier struct{}
+
+// NewApplier creates a new Gemini thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("gemini", NewApplier())
+}
+
+// Apply applies thinking configuration to Gemini request body.
+//
+// Expected output format (Gemini 2.5):
+//
+//	{
+//	  "generationConfig": {
+//	    "thinkingConfig": {
+//	      "thinkingBudget": 8192,
+//	      "includeThoughts": true
+//	    }
+//	  }
+//	}
+//
+// Expected output format (Gemini 3.x):
+//
+//	{
+//	  "generationConfig": {
+//	    "thinkingConfig": {
+//	      "thinkingLevel": "high",
+//	      "includeThoughts": true
+//	    }
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return a.applyCompatible(body, config)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// Choose format based on config.Mode and model capabilities:
+	// - ModeLevel: use Level format (validation will reject unsupported levels)
+	// - ModeNone: use Level format if model has Levels, else Budget format
+	// - ModeBudget/ModeAuto: use Budget format
+	switch config.Mode {
+	case thinking.ModeLevel:
+		return a.applyLevelFormat(body, config)
+	case thinking.ModeNone:
+		// ModeNone: route based on model capability (has Levels or not)
+		if len(modelInfo.Thinking.Levels) > 0 {
+			return a.applyLevelFormat(body, config)
+		}
+		return a.applyBudgetFormat(body, config)
+	default:
+		return a.applyBudgetFormat(body, config)
+	}
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// ModeNone semantics:
+	//   - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models)
+	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
+	// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.
+
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
+
+	budget := config.Budget
+	// ModeNone semantics:
+	//   - ModeNone + Budget=0: completely disable thinking
+	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
+	// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -0,0 +1,126 @@
+// Package geminicli implements thinking configuration for Gemini CLI API format.
+//
+// Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of
+// generationConfig.thinkingConfig.* used by standard Gemini API.
+package geminicli
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Gemini CLI API format.
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Gemini CLI thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("gemini-cli", NewApplier())
+}
+
+// Apply applies thinking configuration to Gemini CLI request body.
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return a.applyCompatible(body, config)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// ModeAuto: Always use Budget format with thinkingBudget=-1
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+	if config.Mode == thinking.ModeBudget {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	// For non-auto modes, choose format based on model capabilities
+	support := modelInfo.Thinking
+	if len(support.Levels) > 0 {
+		return a.applyLevelFormat(body, config)
+	}
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
+
+	budget := config.Budget
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
--- a/internal/thinking/provider/iflow/apply.go
+++ b/internal/thinking/provider/iflow/apply.go
@@ -0,0 +1,173 @@
+// Package iflow implements thinking configuration for iFlow models.
+//
+// iFlow models use boolean toggle semantics:
+//   - Models using chat_template_kwargs.enable_thinking (boolean toggle)
+//   - MiniMax models: reasoning_split (boolean)
+//
+// Level values are converted to boolean: none=false, all others=true
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-9
+package iflow
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for iFlow models.
+//
+// iFlow-specific behavior:
+//   - enable_thinking toggle models: enable_thinking boolean
+//   - GLM models: enable_thinking boolean + clear_thinking=false
+//   - MiniMax models: reasoning_split boolean
+//   - Level to boolean: none=false, others=true
+//   - No quantized support (only on/off)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new iFlow thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("iflow", NewApplier())
+}
+
+// Apply applies thinking configuration to iFlow request body.
+//
+// Expected output format (GLM):
+//
+//	{
+//	  "chat_template_kwargs": {
+//	    "enable_thinking": true,
+//	    "clear_thinking": false
+//	  }
+//	}
+//
+// Expected output format (MiniMax):
+//
+//	{
+//	  "reasoning_split": true
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	if isEnableThinkingModel(modelInfo.ID) {
+		return applyEnableThinking(body, config, isGLMModel(modelInfo.ID)), nil
+	}
+
+	if isMiniMaxModel(modelInfo.ID) {
+		return applyMiniMax(body, config), nil
+	}
+
+	return body, nil
+}
+
+// configToBoolean converts ThinkingConfig to boolean for iFlow models.
+//
+// Conversion rules:
+//   - ModeNone: false
+//   - ModeAuto: true
+//   - ModeBudget + Budget=0: false
+//   - ModeBudget + Budget>0: true
+//   - ModeLevel + Level="none": false
+//   - ModeLevel + any other level: true
+//   - Default (unknown mode): true
+func configToBoolean(config thinking.ThinkingConfig) bool {
+	switch config.Mode {
+	case thinking.ModeNone:
+		return false
+	case thinking.ModeAuto:
+		return true
+	case thinking.ModeBudget:
+		return config.Budget > 0
+	case thinking.ModeLevel:
+		return config.Level != thinking.LevelNone
+	default:
+		return true
+	}
+}
+
+// applyEnableThinking applies thinking configuration for models that use
+// chat_template_kwargs.enable_thinking format.
+//
+// Output format when enabled:
+//
+//	{"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}}
+//
+// Output format when disabled:
+//
+//	{"chat_template_kwargs": {"enable_thinking": false}}
+//
+// Note: clear_thinking is only set for GLM models when thinking is enabled.
+func applyEnableThinking(body []byte, config thinking.ThinkingConfig, setClearThinking bool) []byte {
+	enableThinking := configToBoolean(config)
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
+
+	// clear_thinking is a GLM-only knob, strip it for other models.
+	result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
+
+	// clear_thinking only needed when thinking is enabled
+	if enableThinking && setClearThinking {
+		result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
+	}
+
+	return result
+}
+
+// applyMiniMax applies thinking configuration for MiniMax models.
+//
+// Output format:
+//
+//	{"reasoning_split": true/false}
+func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
+	reasoningSplit := configToBoolean(config)
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit)
+
+	return result
+}
+
+// isEnableThinkingModel determines if the model uses chat_template_kwargs.enable_thinking format.
+func isEnableThinkingModel(modelID string) bool {
+	if isGLMModel(modelID) {
+		return true
+	}
+	id := strings.ToLower(modelID)
+	switch id {
+	case "qwen3-max-preview", "deepseek-v3.2", "deepseek-v3.1":
+		return true
+	default:
+		return false
+	}
+}
+
+// isGLMModel determines if the model is a GLM series model.
+func isGLMModel(modelID string) bool {
+	return strings.HasPrefix(strings.ToLower(modelID), "glm")
+}
+
+// isMiniMaxModel determines if the model is a MiniMax series model.
+// MiniMax models use reasoning_split format.
+func isMiniMaxModel(modelID string) bool {
+	return strings.HasPrefix(strings.ToLower(modelID), "minimax")
+}
--- a/internal/thinking/provider/openai/apply.go
+++ b/internal/thinking/provider/openai/apply.go
@@ -0,0 +1,128 @@
+// Package openai implements thinking configuration for OpenAI/Codex models.
+//
+// OpenAI models use the reasoning_effort format with discrete levels
+// (low/medium/high). Some models support xhigh and none levels.
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
+package openai
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for OpenAI models.
+//
+// OpenAI-specific behavior:
+//   - Output format: reasoning_effort (string: low/medium/high/xhigh)
+//   - Level-only mode: no numeric budget support
+//   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new OpenAI thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("openai", NewApplier())
+}
+
+// Apply applies thinking configuration to OpenAI request body.
+//
+// Expected output format:
+//
+//	{
+//	  "reasoning_effort": "high"
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return applyCompatibleOpenAI(body, config)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
+	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeLevel {
+		result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level))
+		return result, nil
+	}
+
+	effort := ""
+	support := modelInfo.Thinking
+	if config.Budget == 0 {
+		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
+			effort = string(thinking.LevelNone)
+		}
+	}
+	if effort == "" && config.Level != "" {
+		effort = string(config.Level)
+	}
+	if effort == "" && len(support.Levels) > 0 {
+		effort = support.Levels[0]
+	}
+	if effort == "" {
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
+	return result, nil
+}
+
+func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		effort = string(thinking.LevelNone)
+		if config.Level != "" {
+			effort = string(config.Level)
+		}
+	case thinking.ModeAuto:
+		// Auto mode for user-defined models: pass through as "auto"
+		effort = string(thinking.LevelAuto)
+	case thinking.ModeBudget:
+		// Budget mode: convert budget to level using threshold mapping
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	default:
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
+	return result, nil
+}
+
+func hasLevel(levels []string, target string) bool {
+	for _, level := range levels {
+		if strings.EqualFold(strings.TrimSpace(level), target) {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/thinking/strip.go
+++ b/internal/thinking/strip.go
@@ -0,0 +1,58 @@
+// Package thinking provides unified thinking configuration processing.
+package thinking
+
+import (
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// StripThinkingConfig removes thinking configuration fields from request body.
+//
+// This function is used when a model doesn't support thinking but the request
+// contains thinking configuration. The configuration is silently removed to
+// prevent upstream API errors.
+//
+// Parameters:
+//   - body: Original request body JSON
+//   - provider: Provider name (determines which fields to strip)
+//
+// Returns:
+//   - Modified request body JSON with thinking configuration removed
+//   - Original body is returned unchanged if:
+//   - body is empty or invalid JSON
+//   - provider is unknown
+//   - no thinking configuration found
+func StripThinkingConfig(body []byte, provider string) []byte {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		return body
+	}
+
+	var paths []string
+	switch provider {
+	case "claude":
+		paths = []string{"thinking"}
+	case "gemini":
+		paths = []string{"generationConfig.thinkingConfig"}
+	case "gemini-cli", "antigravity":
+		paths = []string{"request.generationConfig.thinkingConfig"}
+	case "openai":
+		paths = []string{"reasoning_effort"}
+	case "codex":
+		paths = []string{"reasoning.effort"}
+	case "iflow":
+		paths = []string{
+			"chat_template_kwargs.enable_thinking",
+			"chat_template_kwargs.clear_thinking",
+			"reasoning_split",
+			"reasoning_effort",
+		}
+	default:
+		return body
+	}
+
+	result := body
+	for _, path := range paths {
+		result, _ = sjson.DeleteBytes(result, path)
+	}
+	return result
+}
--- a/internal/thinking/suffix.go
+++ b/internal/thinking/suffix.go
@@ -0,0 +1,146 @@
+// Package thinking provides unified thinking configuration processing.
+//
+// This file implements suffix parsing functionality for extracting
+// thinking configuration from model names in the format model(value).
+package thinking
+
+import (
+	"strconv"
+	"strings"
+)
+
+// ParseSuffix extracts thinking suffix from a model name.
+//
+// The suffix format is: model-name(value)
+// Examples:
+//   - "claude-sonnet-4-5(16384)" -> ModelName="claude-sonnet-4-5", RawSuffix="16384"
+//   - "gpt-5.2(high)" -> ModelName="gpt-5.2", RawSuffix="high"
+//   - "gemini-2.5-pro" -> ModelName="gemini-2.5-pro", HasSuffix=false
+//
+// This function only extracts the suffix; it does not validate or interpret
+// the suffix content. Use ParseNumericSuffix, ParseLevelSuffix, etc. for
+// content interpretation.
+func ParseSuffix(model string) SuffixResult {
+	// Find the last opening parenthesis
+	lastOpen := strings.LastIndex(model, "(")
+	if lastOpen == -1 {
+		return SuffixResult{ModelName: model, HasSuffix: false}
+	}
+
+	// Check if the string ends with a closing parenthesis
+	if !strings.HasSuffix(model, ")") {
+		return SuffixResult{ModelName: model, HasSuffix: false}
+	}
+
+	// Extract components
+	modelName := model[:lastOpen]
+	rawSuffix := model[lastOpen+1 : len(model)-1]
+
+	return SuffixResult{
+		ModelName: modelName,
+		HasSuffix: true,
+		RawSuffix: rawSuffix,
+	}
+}
+
+// ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value.
+//
+// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer.
+// Only non-negative integers are considered valid numeric suffixes.
+//
+// Platform note: The budget value uses Go's int type, which is 32-bit on 32-bit
+// systems and 64-bit on 64-bit systems. Values exceeding the platform's int range
+// will return ok=false.
+//
+// Leading zeros are accepted: "08192" parses as 8192.
+//
+// Examples:
+//   - "8192" -> budget=8192, ok=true
+//   - "0" -> budget=0, ok=true (represents ModeNone)
+//   - "08192" -> budget=8192, ok=true (leading zeros accepted)
+//   - "-1" -> budget=0, ok=false (negative numbers are not valid numeric suffixes)
+//   - "high" -> budget=0, ok=false (not a number)
+//   - "9223372036854775808" -> budget=0, ok=false (overflow on 64-bit systems)
+//
+// For special handling of -1 as auto mode, use ParseSpecialSuffix instead.
+func ParseNumericSuffix(rawSuffix string) (budget int, ok bool) {
+	if rawSuffix == "" {
+		return 0, false
+	}
+
+	value, err := strconv.Atoi(rawSuffix)
+	if err != nil {
+		return 0, false
+	}
+
+	// Negative numbers are not valid numeric suffixes
+	// -1 should be handled by special value parsing as "auto"
+	if value < 0 {
+		return 0, false
+	}
+
+	return value, true
+}
+
+// ParseSpecialSuffix attempts to parse a raw suffix as a special thinking mode value.
+//
+// This function handles special strings that represent a change in thinking mode:
+//   - "none" -> ModeNone (disables thinking)
+//   - "auto" -> ModeAuto (automatic/dynamic thinking)
+//   - "-1"   -> ModeAuto (numeric representation of auto mode)
+//
+// String values are case-insensitive.
+func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) {
+	if rawSuffix == "" {
+		return ModeBudget, false
+	}
+
+	// Case-insensitive matching
+	switch strings.ToLower(rawSuffix) {
+	case "none":
+		return ModeNone, true
+	case "auto", "-1":
+		return ModeAuto, true
+	default:
+		return ModeBudget, false
+	}
+}
+
+// ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level.
+//
+// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level.
+// Only discrete effort levels are valid: minimal, low, medium, high, xhigh.
+// Level matching is case-insensitive.
+//
+// Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix
+// instead. This separation allows callers to prioritize special value handling.
+//
+// Examples:
+//   - "high" -> level=LevelHigh, ok=true
+//   - "HIGH" -> level=LevelHigh, ok=true (case insensitive)
+//   - "medium" -> level=LevelMedium, ok=true
+//   - "none" -> level="", ok=false (special value, use ParseSpecialSuffix)
+//   - "auto" -> level="", ok=false (special value, use ParseSpecialSuffix)
+//   - "8192" -> level="", ok=false (numeric, use ParseNumericSuffix)
+//   - "ultra" -> level="", ok=false (unknown level)
+func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) {
+	if rawSuffix == "" {
+		return "", false
+	}
+
+	// Case-insensitive matching
+	switch strings.ToLower(rawSuffix) {
+	case "minimal":
+		return LevelMinimal, true
+	case "low":
+		return LevelLow, true
+	case "medium":
+		return LevelMedium, true
+	case "high":
+		return LevelHigh, true
+	case "xhigh":
+		return LevelXHigh, true
+	default:
+		return "", false
+	}
+}
--- a/internal/thinking/text.go
+++ b/internal/thinking/text.go
@@ -0,0 +1,41 @@
+package thinking
+
+import (
+	"github.com/tidwall/gjson"
+)
+
+// GetThinkingText extracts the thinking text from a content part.
+// Handles various formats:
+// - Simple string: { "thinking": "text" } or { "text": "text" }
+// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
+// - Gemini-style: { "thought": true, "text": "text" }
+// Returns the extracted text string.
+func GetThinkingText(part gjson.Result) string {
+	// Try direct text field first (Gemini-style)
+	if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
+		return text.String()
+	}
+
+	// Try thinking field
+	thinkingField := part.Get("thinking")
+	if !thinkingField.Exists() {
+		return ""
+	}
+
+	// thinking is a string
+	if thinkingField.Type == gjson.String {
+		return thinkingField.String()
+	}
+
+	// thinking is an object with inner text/thinking
+	if thinkingField.IsObject() {
+		if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String {
+			return inner.String()
+		}
+		if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String {
+			return inner.String()
+		}
+	}
+
+	return ""
+}
--- a/internal/thinking/types.go
+++ b/internal/thinking/types.go
@@ -0,0 +1,116 @@
+// Package thinking provides unified thinking configuration processing.
+//
+// This package offers a unified interface for parsing, validating, and applying
+// thinking configurations across various AI providers (Claude, Gemini, OpenAI, iFlow).
+package thinking
+
+import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+
+// ThinkingMode represents the type of thinking configuration mode.
+type ThinkingMode int
+
+const (
+	// ModeBudget indicates using a numeric budget (corresponds to suffix "(1000)" etc.)
+	ModeBudget ThinkingMode = iota
+	// ModeLevel indicates using a discrete level (corresponds to suffix "(high)" etc.)
+	ModeLevel
+	// ModeNone indicates thinking is disabled (corresponds to suffix "(none)" or budget=0)
+	ModeNone
+	// ModeAuto indicates automatic/dynamic thinking (corresponds to suffix "(auto)" or budget=-1)
+	ModeAuto
+)
+
+// String returns the string representation of ThinkingMode.
+func (m ThinkingMode) String() string {
+	switch m {
+	case ModeBudget:
+		return "budget"
+	case ModeLevel:
+		return "level"
+	case ModeNone:
+		return "none"
+	case ModeAuto:
+		return "auto"
+	default:
+		return "unknown"
+	}
+}
+
+// ThinkingLevel represents a discrete thinking level.
+type ThinkingLevel string
+
+const (
+	// LevelNone disables thinking
+	LevelNone ThinkingLevel = "none"
+	// LevelAuto enables automatic/dynamic thinking
+	LevelAuto ThinkingLevel = "auto"
+	// LevelMinimal sets minimal thinking effort
+	LevelMinimal ThinkingLevel = "minimal"
+	// LevelLow sets low thinking effort
+	LevelLow ThinkingLevel = "low"
+	// LevelMedium sets medium thinking effort
+	LevelMedium ThinkingLevel = "medium"
+	// LevelHigh sets high thinking effort
+	LevelHigh ThinkingLevel = "high"
+	// LevelXHigh sets extra-high thinking effort
+	LevelXHigh ThinkingLevel = "xhigh"
+)
+
+// ThinkingConfig represents a unified thinking configuration.
+//
+// This struct is used to pass thinking configuration information between components.
+// Depending on Mode, either Budget or Level field is effective:
+//   - ModeNone: Budget=0, Level is ignored
+//   - ModeAuto: Budget=-1, Level is ignored
+//   - ModeBudget: Budget is a positive integer, Level is ignored
+//   - ModeLevel: Budget is ignored, Level is a valid level
+type ThinkingConfig struct {
+	// Mode specifies the configuration mode
+	Mode ThinkingMode
+	// Budget is the thinking budget (token count), only effective when Mode is ModeBudget.
+	// Special values: 0 means disabled, -1 means automatic
+	Budget int
+	// Level is the thinking level, only effective when Mode is ModeLevel
+	Level ThinkingLevel
+}
+
+// SuffixResult represents the result of parsing a model name for thinking suffix.
+//
+// A thinking suffix is specified in the format model-name(value), where value
+// can be a numeric budget (e.g., "16384") or a level name (e.g., "high").
+type SuffixResult struct {
+	// ModelName is the model name with the suffix removed.
+	// If no suffix was found, this equals the original input.
+	ModelName string
+
+	// HasSuffix indicates whether a valid suffix was found.
+	HasSuffix bool
+
+	// RawSuffix is the content inside the parentheses, without the parentheses.
+	// Empty string if HasSuffix is false.
+	RawSuffix string
+}
+
+// ProviderApplier defines the interface for provider-specific thinking configuration application.
+//
+// Types implementing this interface are responsible for converting a unified ThinkingConfig
+// into provider-specific format and applying it to the request body.
+//
+// Implementation requirements:
+//   - Apply method must be idempotent
+//   - Must not modify the input config or modelInfo
+//   - Returns a modified copy of the request body
+//   - Returns appropriate ThinkingError for unsupported configurations
+type ProviderApplier interface {
+	// Apply applies the thinking configuration to the request body.
+	//
+	// Parameters:
+	//   - body: Original request body JSON
+	//   - config: Unified thinking configuration
+	//   - modelInfo: Model registry information containing ThinkingSupport properties
+	//
+	// Returns:
+	//   - Modified request body JSON
+	//   - ThinkingError if the configuration is invalid or unsupported
+	Apply(body []byte, config ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error)
+}
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -0,0 +1,378 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	log "github.com/sirupsen/logrus"
+)
+
+// ValidateConfig validates a thinking configuration against model capabilities.
+//
+// This function performs comprehensive validation:
+//   - Checks if the model supports thinking
+//   - Auto-converts between Budget and Level formats based on model capability
+//   - Validates that requested level is in the model's supported levels list
+//   - Clamps budget values to model's allowed range
+//   - When converting Budget -> Level for level-only models, clamps the derived standard level to the nearest supported level
+//     (special values none/auto are preserved)
+//   - When config comes from a model suffix, strict budget validation is disabled (we clamp instead of error)
+//
+// Parameters:
+//   - config: The thinking configuration to validate
+//   - support: Model's ThinkingSupport properties (nil means no thinking support)
+//   - fromFormat: Source provider format (used to determine strict validation rules)
+//   - toFormat: Target provider format
+//   - fromSuffix: Whether config was sourced from model suffix
+//
+// Returns:
+//   - Normalized ThinkingConfig with clamped values
+//   - ThinkingError if validation fails (ErrThinkingNotSupported, ErrLevelNotSupported, etc.)
+//
+// Auto-conversion behavior:
+//   - Budget-only model + Level config → Level converted to Budget
+//   - Level-only model + Budget config → Budget converted to Level
+//   - Hybrid model → preserve original format
+func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string, fromSuffix bool) (*ThinkingConfig, error) {
+	fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat))
+	model := "unknown"
+	support := (*registry.ThinkingSupport)(nil)
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		support = modelInfo.Thinking
+	}
+
+	if support == nil {
+		if config.Mode != ModeNone {
+			return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", model)
+		}
+		return &config, nil
+	}
+
+	allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat)
+	strictBudget := !fromSuffix && fromFormat != "" && isSameProviderFamily(fromFormat, toFormat)
+	budgetDerivedFromLevel := false
+
+	capability := detectModelCapability(modelInfo)
+	switch capability {
+	case CapabilityBudgetOnly:
+		if config.Mode == ModeLevel {
+			if config.Level == LevelAuto {
+				break
+			}
+			budget, ok := ConvertLevelToBudget(string(config.Level))
+			if !ok {
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", config.Level))
+			}
+			config.Mode = ModeBudget
+			config.Budget = budget
+			config.Level = ""
+			budgetDerivedFromLevel = true
+		}
+	case CapabilityLevelOnly:
+		if config.Mode == ModeBudget {
+			level, ok := ConvertBudgetToLevel(config.Budget)
+			if !ok {
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", config.Budget))
+			}
+			// When converting Budget -> Level for level-only models, clamp the derived standard level
+			// to the nearest supported level. Special values (none/auto) are preserved.
+			config.Mode = ModeLevel
+			config.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat)
+			config.Budget = 0
+		}
+	case CapabilityHybrid:
+	}
+
+	if config.Mode == ModeLevel && config.Level == LevelNone {
+		config.Mode = ModeNone
+		config.Budget = 0
+		config.Level = ""
+	}
+	if config.Mode == ModeLevel && config.Level == LevelAuto {
+		config.Mode = ModeAuto
+		config.Budget = -1
+		config.Level = ""
+	}
+	if config.Mode == ModeBudget && config.Budget == 0 {
+		config.Mode = ModeNone
+		config.Level = ""
+	}
+
+	if len(support.Levels) > 0 && config.Mode == ModeLevel {
+		if !isLevelSupported(string(config.Level), support.Levels) {
+			if allowClampUnsupported {
+				config.Level = clampLevel(config.Level, modelInfo, toFormat)
+			}
+			if !isLevelSupported(string(config.Level), support.Levels) {
+				// User explicitly specified an unsupported level - return error
+				// (budget-derived levels may be clamped based on source format)
+				validLevels := normalizeLevels(support.Levels)
+				message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(config.Level)), strings.Join(validLevels, ", "))
+				return nil, NewThinkingError(ErrLevelNotSupported, message)
+			}
+		}
+	}
+
+	if strictBudget && config.Mode == ModeBudget && !budgetDerivedFromLevel {
+		min, max := support.Min, support.Max
+		if min != 0 || max != 0 {
+			if config.Budget < min || config.Budget > max || (config.Budget == 0 && !support.ZeroAllowed) {
+				message := fmt.Sprintf("budget %d out of range [%d,%d]", config.Budget, min, max)
+				return nil, NewThinkingError(ErrBudgetOutOfRange, message)
+			}
+		}
+	}
+
+	// Convert ModeAuto to mid-range if dynamic not allowed
+	if config.Mode == ModeAuto && !support.DynamicAllowed {
+		config = convertAutoToMidRange(config, support, toFormat, model)
+	}
+
+	if config.Mode == ModeNone && toFormat == "claude" {
+		// Claude supports explicit disable via thinking.type="disabled".
+		// Keep Budget=0 so applier can omit budget_tokens.
+		config.Budget = 0
+		config.Level = ""
+	} else {
+		switch config.Mode {
+		case ModeBudget, ModeAuto, ModeNone:
+			config.Budget = clampBudget(config.Budget, modelInfo, toFormat)
+		}
+
+		// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
+		// This ensures Apply layer doesn't need to access support.Levels
+		if config.Mode == ModeNone && config.Budget > 0 && len(support.Levels) > 0 {
+			config.Level = ThinkingLevel(support.Levels[0])
+		}
+	}
+
+	return &config, nil
+}
+
+// convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
+//
+// This function handles the case where a model does not support dynamic/auto thinking.
+// The auto mode is silently converted to a fixed value based on model capability:
+//   - Level-only models: convert to ModeLevel with LevelMedium
+//   - Budget models: convert to ModeBudget with mid = (Min + Max) / 2
+//
+// Logging:
+//   - Debug level when conversion occurs
+//   - Fields: original_mode, clamped_to, reason
+func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport, provider, model string) ThinkingConfig {
+	// For level-only models (has Levels but no Min/Max range), use ModeLevel with medium
+	if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 {
+		config.Mode = ModeLevel
+		config.Level = LevelMedium
+		config.Budget = 0
+		log.WithFields(log.Fields{
+			"provider":      provider,
+			"model":         model,
+			"original_mode": "auto",
+			"clamped_to":    string(LevelMedium),
+		}).Debug("thinking: mode converted, dynamic not allowed, using medium level |")
+		return config
+	}
+
+	// For budget models, use mid-range budget
+	mid := (support.Min + support.Max) / 2
+	if mid <= 0 && support.ZeroAllowed {
+		config.Mode = ModeNone
+		config.Budget = 0
+	} else if mid <= 0 {
+		config.Mode = ModeBudget
+		config.Budget = support.Min
+	} else {
+		config.Mode = ModeBudget
+		config.Budget = mid
+	}
+	log.WithFields(log.Fields{
+		"provider":      provider,
+		"model":         model,
+		"original_mode": "auto",
+		"clamped_to":    config.Budget,
+	}).Debug("thinking: mode converted, dynamic not allowed |")
+	return config
+}
+
+// standardLevelOrder defines the canonical ordering of thinking levels from lowest to highest.
+var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh}
+
+// clampLevel clamps the given level to the nearest supported level.
+// On tie, prefers the lower level.
+func clampLevel(level ThinkingLevel, modelInfo *registry.ModelInfo, provider string) ThinkingLevel {
+	model := "unknown"
+	var supported []string
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		if modelInfo.Thinking != nil {
+			supported = modelInfo.Thinking.Levels
+		}
+	}
+
+	if len(supported) == 0 || isLevelSupported(string(level), supported) {
+		return level
+	}
+
+	pos := levelIndex(string(level))
+	if pos == -1 {
+		return level
+	}
+	bestIdx, bestDist := -1, len(standardLevelOrder)+1
+
+	for _, s := range supported {
+		if idx := levelIndex(strings.TrimSpace(s)); idx != -1 {
+			if dist := abs(pos - idx); dist < bestDist || (dist == bestDist && idx < bestIdx) {
+				bestIdx, bestDist = idx, dist
+			}
+		}
+	}
+
+	if bestIdx >= 0 {
+		clamped := standardLevelOrder[bestIdx]
+		log.WithFields(log.Fields{
+			"provider":       provider,
+			"model":          model,
+			"original_value": string(level),
+			"clamped_to":     string(clamped),
+		}).Debug("thinking: level clamped |")
+		return clamped
+	}
+	return level
+}
+
+// clampBudget clamps a budget value to the model's supported range.
+func clampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
+	model := "unknown"
+	support := (*registry.ThinkingSupport)(nil)
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		support = modelInfo.Thinking
+	}
+	if support == nil {
+		return value
+	}
+
+	// Auto value (-1) passes through without clamping.
+	if value == -1 {
+		return value
+	}
+
+	min, max := support.Min, support.Max
+	if value == 0 && !support.ZeroAllowed {
+		log.WithFields(log.Fields{
+			"provider":       provider,
+			"model":          model,
+			"original_value": value,
+			"clamped_to":     min,
+			"min":            min,
+			"max":            max,
+		}).Warn("thinking: budget zero not allowed |")
+		return min
+	}
+
+	// Some models are level-only and do not define numeric budget ranges.
+	if min == 0 && max == 0 {
+		return value
+	}
+
+	if value < min {
+		if value == 0 && support.ZeroAllowed {
+			return 0
+		}
+		logClamp(provider, model, value, min, min, max)
+		return min
+	}
+	if value > max {
+		logClamp(provider, model, value, max, min, max)
+		return max
+	}
+	return value
+}
+
+func isLevelSupported(level string, supported []string) bool {
+	for _, s := range supported {
+		if strings.EqualFold(level, strings.TrimSpace(s)) {
+			return true
+		}
+	}
+	return false
+}
+
+func levelIndex(level string) int {
+	for i, l := range standardLevelOrder {
+		if strings.EqualFold(level, string(l)) {
+			return i
+		}
+	}
+	return -1
+}
+
+func normalizeLevels(levels []string) []string {
+	out := make([]string, len(levels))
+	for i, l := range levels {
+		out[i] = strings.ToLower(strings.TrimSpace(l))
+	}
+	return out
+}
+
+func isBudgetBasedProvider(provider string) bool {
+	switch provider {
+	case "gemini", "gemini-cli", "antigravity", "claude":
+		return true
+	default:
+		return false
+	}
+}
+
+func isLevelBasedProvider(provider string) bool {
+	switch provider {
+	case "openai", "openai-response", "codex":
+		return true
+	default:
+		return false
+	}
+}
+
+func isGeminiFamily(provider string) bool {
+	switch provider {
+	case "gemini", "gemini-cli", "antigravity":
+		return true
+	default:
+		return false
+	}
+}
+
+func isSameProviderFamily(from, to string) bool {
+	if from == to {
+		return true
+	}
+	return isGeminiFamily(from) && isGeminiFamily(to)
+}
+
+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
+func logClamp(provider, model string, original, clampedTo, min, max int) {
+	log.WithFields(log.Fields{
+		"provider":       provider,
+		"model":          model,
+		"original_value": original,
+		"min":            min,
+		"max":            max,
+		"clamped_to":     clampedTo,
+	}).Debug("thinking: budget clamped |")
+}
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -7,40 +7,16 @@ package claude

 import (
 	"bytes"
-	"crypto/sha256"
-	"encoding/hex"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )

-// deriveSessionID generates a stable session ID from the request.
-// Uses the hash of the first user message to identify the conversation.
-func deriveSessionID(rawJSON []byte) string {
-	messages := gjson.GetBytes(rawJSON, "messages")
-	if !messages.IsArray() {
-		return ""
-	}
-	for _, msg := range messages.Array() {
-		if msg.Get("role").String() == "user" {
-			content := msg.Get("content").String()
-			if content == "" {
-				// Try to get text from content array
-				content = msg.Get("content.0.text").String()
-			}
-			if content != "" {
-				h := sha256.Sum256([]byte(content))
-				return hex.EncodeToString(h[:16])
-			}
-		}
-	}
-	return ""
-}
-
 // ConvertClaudeRequestToAntigravity parses and transforms a Claude Code API request into Gemini CLI API format.
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the Gemini CLI API.
@@ -60,11 +36,9 @@ func deriveSessionID(rawJSON []byte) string {
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
+	enableThoughtTranslate := true
 	rawJSON := bytes.Clone(inputRawJSON)

-	// Derive session ID for signature caching
-	sessionID := deriveSessionID(rawJSON)
-
 	// system instruction
 	systemInstructionJSON := ""
 	hasSystemInstruction := false
@@ -122,42 +96,50 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					contentTypeResult := contentResult.Get("type")
 					if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
 						// Use GetThinkingText to handle wrapped thinking objects
-						thinkingText := util.GetThinkingText(contentResult)
-						signatureResult := contentResult.Get("signature")
-						clientSignature := ""
-						if signatureResult.Exists() && signatureResult.String() != "" {
-							clientSignature = signatureResult.String()
-						}
+						thinkingText := thinking.GetThinkingText(contentResult)

 						// Always try cached signature first (more reliable than client-provided)
 						// Client may send stale or invalid signatures from different sessions
 						signature := ""
-						if sessionID != "" && thinkingText != "" {
-							if cachedSig := cache.GetCachedSignature(sessionID, thinkingText); cachedSig != "" {
+						if thinkingText != "" {
+							if cachedSig := cache.GetCachedSignature(modelName, thinkingText); cachedSig != "" {
 								signature = cachedSig
 								// log.Debugf("Using cached signature for thinking block")
 							}
 						}

 						// Fallback to client signature only if cache miss and client signature is valid
-						if signature == "" && cache.HasValidSignature(clientSignature) {
-							signature = clientSignature
+						if signature == "" {
+							signatureResult := contentResult.Get("signature")
+							clientSignature := ""
+							if signatureResult.Exists() && signatureResult.String() != "" {
+								arrayClientSignatures := strings.SplitN(signatureResult.String(), "#", 2)
+								if len(arrayClientSignatures) == 2 {
+									if modelName == arrayClientSignatures[0] {
+										clientSignature = arrayClientSignatures[1]
+									}
+								}
+							}
+							if cache.HasValidSignature(modelName, clientSignature) {
+								signature = clientSignature
+							}
 							// log.Debugf("Using client-provided signature for thinking block")
 						}

 						// Store for subsequent tool_use in the same message
-						if cache.HasValidSignature(signature) {
+						if cache.HasValidSignature(modelName, signature) {
 							currentMessageThinkingSignature = signature
 						}

 						// Skip trailing unsigned thinking blocks on last assistant message
-						isUnsigned := !cache.HasValidSignature(signature)
+						isUnsigned := !cache.HasValidSignature(modelName, signature)

 						// If unsigned, skip entirely (don't convert to text)
 						// Claude requires assistant messages to start with thinking blocks when thinking is enabled
 						// Converting to text would break this requirement
 						if isUnsigned {
 							// log.Debugf("Dropping unsigned thinking block (no valid signature)")
+							enableThoughtTranslate = false
 							continue
 						}

@@ -205,7 +187,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 							// This is the approach used in opencode-google-antigravity-auth for Gemini
 							// and also works for Claude through Antigravity API
 							const skipSentinel = "skip_thought_signature_validator"
-							if cache.HasValidSignature(currentMessageThinkingSignature) {
+							if cache.HasValidSignature(modelName, currentMessageThinkingSignature) {
 								partJSON, _ = sjson.Set(partJSON, "thoughtSignature", currentMessageThinkingSignature)
 							} else {
 								// No valid signature - use skip sentinel to bypass validation
@@ -385,12 +367,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	}

 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
+	if t := gjson.GetBytes(rawJSON, "thinking"); enableThoughtTranslate && t.Exists() && t.IsObject() {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
@@ -4,6 +4,7 @@ import (
 	"strings"
 	"testing"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
 	"github.com/tidwall/gjson"
 )

@@ -73,30 +74,41 @@ func TestConvertClaudeRequestToAntigravity_RoleMapping(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Valid signature must be at least 50 characters
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Let me think..."
+
+	// Pre-cache the signature (simulating a previous response for the same thinking text)
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
-					{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
 					{"type": "text", "text": "Answer"}
 				]
 			}
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

-	// Check thinking block conversion
-	firstPart := gjson.Get(outputStr, "request.contents.0.parts.0")
+	// Check thinking block conversion (now in contents.1 due to user message)
+	firstPart := gjson.Get(outputStr, "request.contents.1.parts.0")
 	if !firstPart.Get("thought").Bool() {
 		t.Error("thinking block should have thought: true")
 	}
-	if firstPart.Get("text").String() != "Let me think..." {
+	if firstPart.Get("text").String() != thinkingText {
 		t.Error("thinking text mismatch")
 	}
 	if firstPart.Get("thoughtSignature").String() != validSignature {
@@ -105,6 +117,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ThinkingBlockWithoutSignature(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Unsigned thinking blocks should be removed entirely (not converted to text)
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
@@ -226,14 +240,22 @@ func TestConvertClaudeRequestToAntigravity_ToolUse(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Let me think..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
-					{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
 					{
 						"type": "tool_use",
 						"id": "call_123",
@@ -245,11 +267,13 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

-	// Check function call has the signature from the preceding thinking block
-	part := gjson.Get(outputStr, "request.contents.0.parts.1")
+	// Check function call has the signature from the preceding thinking block (now in contents.1)
+	part := gjson.Get(outputStr, "request.contents.1.parts.1")
 	if part.Get("functionCall.name").String() != "get_weather" {
 		t.Errorf("Expected functionCall, got %s", part.Raw)
 	}
@@ -259,26 +283,36 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ReorderThinking(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Case: text block followed by thinking block -> should be reordered to thinking first
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Planning..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
 					{"type": "text", "text": "Here is the plan."},
-					{"type": "thinking", "thinking": "Planning...", "signature": "` + validSignature + `"}
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
 				]
 			}
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

-	// Verify order: Thinking block MUST be first
-	parts := gjson.Get(outputStr, "request.contents.0.parts").Array()
+	// Verify order: Thinking block MUST be first (now in contents.1 due to user message)
+	parts := gjson.Get(outputStr, "request.contents.1.parts").Array()
 	if len(parts) != 2 {
 		t.Fatalf("Expected 2 parts, got %d", len(parts))
 	}
@@ -343,8 +377,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingConfig(t *testing.T) {
 		if thinkingConfig.Get("thinkingBudget").Int() != 8000 {
 			t.Errorf("Expected thinkingBudget 8000, got %d", thinkingConfig.Get("thinkingBudget").Int())
 		}
-		if !thinkingConfig.Get("include_thoughts").Bool() {
-			t.Error("include_thoughts should be true")
+		if !thinkingConfig.Get("includeThoughts").Bool() {
+			t.Error("includeThoughts should be true")
 		}
 	} else {
 		t.Log("thinkingConfig not present - model may not be registered in test registry")
@@ -459,7 +493,12 @@ func TestConvertClaudeRequestToAntigravity_TrailingUnsignedThinking_Removed(t *t
 }

 func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Last assistant message ends with signed thinking block - should be kept
+	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Valid thinking..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
@@ -471,12 +510,14 @@ func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testin
 				"role": "assistant",
 				"content": [
 					{"type": "text", "text": "Here is my answer"},
-					{"type": "thinking", "thinking": "Valid thinking...", "signature": "abc123validSignature1234567890123456789012345678901234567890"}
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
 				]
 			}
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -41,7 +41,6 @@ type Params struct {
 	HasContent           bool   // Tracks whether any content (text, thinking, or tool use) has been output

 	// Signature caching support
-	SessionID           string          // Session ID derived from request for signature caching
 	CurrentThinkingText strings.Builder // Accumulates thinking text for signature caching
 }

@@ -70,9 +69,9 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 			HasFirstResponse: false,
 			ResponseType:     0,
 			ResponseIndex:    0,
-			SessionID:        deriveSessionID(originalRequestRawJSON),
 		}
 	}
+	modelName := gjson.GetBytes(requestRawJSON, "model").String()

 	params := (*param).(*Params)

@@ -138,14 +137,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 					if thoughtSignature := partResult.Get("thoughtSignature"); thoughtSignature.Exists() && thoughtSignature.String() != "" {
 						// log.Debug("Branch: signature_delta")

-						if params.SessionID != "" && params.CurrentThinkingText.Len() > 0 {
-							cache.CacheSignature(params.SessionID, params.CurrentThinkingText.String(), thoughtSignature.String())
-							// log.Debugf("Cached signature for thinking block (sessionID=%s, textLen=%d)", params.SessionID, params.CurrentThinkingText.Len())
+						if params.CurrentThinkingText.Len() > 0 {
+							cache.CacheSignature(modelName, params.CurrentThinkingText.String(), thoughtSignature.String())
+							// log.Debugf("Cached signature for thinking block (textLen=%d)", params.CurrentThinkingText.Len())
 							params.CurrentThinkingText.Reset()
 						}

 						output = output + "event: content_block_delta\n"
-						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
+						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", fmt.Sprintf("%s#%s", cache.GetModelGroup(modelName), thoughtSignature.String()))
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						params.HasContent = true
 					} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
@@ -372,7 +371,7 @@ func resolveStopReason(params *Params) string {
 //   - string: A Claude-compatible JSON response.
 func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	_ = originalRequestRawJSON
-	_ = requestRawJSON
+	modelName := gjson.GetBytes(requestRawJSON, "model").String()

 	root := gjson.ParseBytes(rawJSON)
 	promptTokens := root.Get("response.usageMetadata.promptTokenCount").Int()
@@ -437,7 +436,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 		block := `{"type":"thinking","thinking":""}`
 		block, _ = sjson.Set(block, "thinking", thinkingBuilder.String())
 		if thinkingSignature != "" {
-			block, _ = sjson.Set(block, "signature", thinkingSignature)
+			block, _ = sjson.Set(block, "signature", fmt.Sprintf("%s#%s", cache.GetModelGroup(modelName), thinkingSignature))
 		}
 		responseJSON, _ = sjson.SetRaw(responseJSON, "content.-1", block)
 		thinkingBuilder.Reset()
--- a/internal/translator/antigravity/claude/antigravity_claude_response_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response_test.go
@@ -12,10 +12,10 @@ import (
 // Signature Caching Tests
 // ============================================================================

-func TestConvertAntigravityResponseToClaude_SessionIDDerived(t *testing.T) {
+func TestConvertAntigravityResponseToClaude_ParamsInitialized(t *testing.T) {
 	cache.ClearSignatureCache("")

-	// Request with user message - should derive session ID
+	// Request with user message - should initialize params
 	requestJSON := []byte(`{
 		"messages": [
 			{"role": "user", "content": [{"type": "text", "text": "Hello world"}]}
@@ -37,10 +37,12 @@ func TestConvertAntigravityResponseToClaude_SessionIDDerived(t *testing.T) {
 	ctx := context.Background()
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, responseJSON, &param)

-	// Verify session ID was set
 	params := param.(*Params)
-	if params.SessionID == "" {
-		t.Error("SessionID should be derived from request")
+	if !params.HasFirstResponse {
+		t.Error("HasFirstResponse should be set after first chunk")
+	}
+	if params.CurrentThinkingText.Len() == 0 {
+		t.Error("Thinking text should be accumulated")
 	}
 }

@@ -97,6 +99,7 @@ func TestConvertAntigravityResponseToClaude_SignatureCached(t *testing.T) {
 	cache.ClearSignatureCache("")

 	requestJSON := []byte(`{
+		"model": "claude-sonnet-4-5-thinking",
 		"messages": [{"role": "user", "content": [{"type": "text", "text": "Cache test"}]}]
 	}`)

@@ -129,12 +132,8 @@ func TestConvertAntigravityResponseToClaude_SignatureCached(t *testing.T) {
 	// Process thinking chunk
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, thinkingChunk, &param)
 	params := param.(*Params)
-	sessionID := params.SessionID
 	thinkingText := params.CurrentThinkingText.String()

-	if sessionID == "" {
-		t.Fatal("SessionID should be set")
-	}
 	if thinkingText == "" {
 		t.Fatal("Thinking text should be accumulated")
 	}
@@ -143,7 +142,7 @@ func TestConvertAntigravityResponseToClaude_SignatureCached(t *testing.T) {
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, signatureChunk, &param)

 	// Verify signature was cached
-	cachedSig := cache.GetCachedSignature(sessionID, thinkingText)
+	cachedSig := cache.GetCachedSignature("claude-sonnet-4-5-thinking", thinkingText)
 	if cachedSig != validSignature {
 		t.Errorf("Expected cached signature '%s', got '%s'", validSignature, cachedSig)
 	}
@@ -158,6 +157,7 @@ func TestConvertAntigravityResponseToClaude_MultipleThinkingBlocks(t *testing.T)
 	cache.ClearSignatureCache("")

 	requestJSON := []byte(`{
+		"model": "claude-sonnet-4-5-thinking",
 		"messages": [{"role": "user", "content": [{"type": "text", "text": "Multi block test"}]}]
 	}`)

@@ -221,13 +221,12 @@ func TestConvertAntigravityResponseToClaude_MultipleThinkingBlocks(t *testing.T)
 	// Process first thinking block
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, block1Thinking, &param)
 	params := param.(*Params)
-	sessionID := params.SessionID
 	firstThinkingText := params.CurrentThinkingText.String()

 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, block1Sig, &param)

 	// Verify first signature cached
-	if cache.GetCachedSignature(sessionID, firstThinkingText) != validSig1 {
+	if cache.GetCachedSignature("claude-sonnet-4-5-thinking", firstThinkingText) != validSig1 {
 		t.Error("First thinking block signature should be cached")
 	}

@@ -241,76 +240,7 @@ func TestConvertAntigravityResponseToClaude_MultipleThinkingBlocks(t *testing.T)
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, block2Sig, &param)

 	// Verify second signature cached
-	if cache.GetCachedSignature(sessionID, secondThinkingText) != validSig2 {
+	if cache.GetCachedSignature("claude-sonnet-4-5-thinking", secondThinkingText) != validSig2 {
 		t.Error("Second thinking block signature should be cached")
 	}
 }
-
-func TestDeriveSessionIDFromRequest(t *testing.T) {
-	tests := []struct {
-		name      string
-		input     []byte
-		wantEmpty bool
-	}{
-		{
-			name:      "valid user message",
-			input:     []byte(`{"messages": [{"role": "user", "content": "Hello"}]}`),
-			wantEmpty: false,
-		},
-		{
-			name:      "user message with content array",
-			input:     []byte(`{"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]}`),
-			wantEmpty: false,
-		},
-		{
-			name:      "no user message",
-			input:     []byte(`{"messages": [{"role": "assistant", "content": "Hi"}]}`),
-			wantEmpty: true,
-		},
-		{
-			name:      "empty messages",
-			input:     []byte(`{"messages": []}`),
-			wantEmpty: true,
-		},
-		{
-			name:      "no messages field",
-			input:     []byte(`{}`),
-			wantEmpty: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := deriveSessionID(tt.input)
-			if tt.wantEmpty && result != "" {
-				t.Errorf("Expected empty session ID, got '%s'", result)
-			}
-			if !tt.wantEmpty && result == "" {
-				t.Error("Expected non-empty session ID")
-			}
-		})
-	}
-}
-
-func TestDeriveSessionIDFromRequest_Deterministic(t *testing.T) {
-	input := []byte(`{"messages": [{"role": "user", "content": "Same message"}]}`)
-
-	id1 := deriveSessionID(input)
-	id2 := deriveSessionID(input)
-
-	if id1 != id2 {
-		t.Errorf("Session ID should be deterministic: '%s' != '%s'", id1, id2)
-	}
-}
-
-func TestDeriveSessionIDFromRequest_DifferentMessages(t *testing.T) {
-	input1 := []byte(`{"messages": [{"role": "user", "content": "Message A"}]}`)
-	input2 := []byte(`{"messages": [{"role": "user", "content": "Message B"}]}`)
-
-	id1 := deriveSessionID(input1)
-	id2 := deriveSessionID(input2)
-
-	if id1 == id2 {
-		t.Error("Different messages should produce different session IDs")
-	}
-}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -8,6 +8,7 @@ package gemini
 import (
 	"bytes"
 	"fmt"
+	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -32,12 +33,12 @@ import (
 //
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
-func ConvertGeminiRequestToAntigravity(_ string, inputRawJSON []byte, _ bool) []byte {
+func ConvertGeminiRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
 	rawJSON := bytes.Clone(inputRawJSON)
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
-	template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
+	template, _ = sjson.Set(template, "model", modelName)
 	template, _ = sjson.Delete(template, "request.model")

 	template, errFixCLIToolResponse := fixCLIToolResponse(template)
@@ -97,37 +98,40 @@ func ConvertGeminiRequestToAntigravity(_ string, inputRawJSON []byte, _ bool) []
 		}
 	}

-	// Gemini-specific handling: add skip_thought_signature_validator to functionCall parts
-	// and remove thinking blocks entirely (Gemini doesn't need to preserve them)
-	const skipSentinel = "skip_thought_signature_validator"
+	// Gemini-specific handling for non-Claude models:
+	// - Add skip_thought_signature_validator to functionCall parts so upstream can bypass signature validation.
+	// - Also mark thinking parts with the same sentinel when present (we keep the parts; we only annotate them).
+	if !strings.Contains(modelName, "claude") {
+		const skipSentinel = "skip_thought_signature_validator"

-	gjson.GetBytes(rawJSON, "request.contents").ForEach(func(contentIdx, content gjson.Result) bool {
-		if content.Get("role").String() == "model" {
-			// First pass: collect indices of thinking parts to remove
-			var thinkingIndicesToRemove []int64
-			content.Get("parts").ForEach(func(partIdx, part gjson.Result) bool {
-				// Mark thinking blocks for removal
-				if part.Get("thought").Bool() {
-					thinkingIndicesToRemove = append(thinkingIndicesToRemove, partIdx.Int())
-				}
-				// Add skip sentinel to functionCall parts
-				if part.Get("functionCall").Exists() {
-					existingSig := part.Get("thoughtSignature").String()
-					if existingSig == "" || len(existingSig) < 50 {
-						rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", contentIdx.Int(), partIdx.Int()), skipSentinel)
+		gjson.GetBytes(rawJSON, "request.contents").ForEach(func(contentIdx, content gjson.Result) bool {
+			if content.Get("role").String() == "model" {
+				// First pass: collect indices of thinking parts to mark with skip sentinel
+				var thinkingIndicesToSkipSignature []int64
+				content.Get("parts").ForEach(func(partIdx, part gjson.Result) bool {
+					// Collect indices of thinking blocks to mark with skip sentinel
+					if part.Get("thought").Bool() {
+						thinkingIndicesToSkipSignature = append(thinkingIndicesToSkipSignature, partIdx.Int())
 					}
-				}
-				return true
-			})
+					// Add skip sentinel to functionCall parts
+					if part.Get("functionCall").Exists() {
+						existingSig := part.Get("thoughtSignature").String()
+						if existingSig == "" || len(existingSig) < 50 {
+							rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", contentIdx.Int(), partIdx.Int()), skipSentinel)
+						}
+					}
+					return true
+				})

-			// Remove thinking blocks in reverse order to preserve indices
-			for i := len(thinkingIndicesToRemove) - 1; i >= 0; i-- {
-				idx := thinkingIndicesToRemove[i]
-				rawJSON, _ = sjson.DeleteBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d", contentIdx.Int(), idx))
+				// Add skip_thought_signature_validator sentinel to thinking blocks in reverse order to preserve indices
+				for i := len(thinkingIndicesToSkipSignature) - 1; i >= 0; i-- {
+					idx := thinkingIndicesToSkipSignature[i]
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", contentIdx.Int(), idx), skipSentinel)
+				}
 			}
-		}
-		return true
-	})
+			return true
+		})
+	}

 	return common.AttachDefaultSafetySettings(rawJSON, "request.safetySettings")
 }
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request_test.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request_test.go
@@ -62,40 +62,6 @@ func TestConvertGeminiRequestToAntigravity_AddSkipSentinelToFunctionCall(t *test
 	}
 }

-func TestConvertGeminiRequestToAntigravity_RemoveThinkingBlocks(t *testing.T) {
-	// Thinking blocks should be removed entirely for Gemini
-	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
-	inputJSON := []byte(fmt.Sprintf(`{
-		"model": "gemini-3-pro-preview",
-		"contents": [
-			{
-				"role": "model",
-				"parts": [
-					{"thought": true, "text": "Thinking...", "thoughtSignature": "%s"},
-					{"text": "Here is my response"}
-				]
-			}
-		]
-	}`, validSignature))
-
-	output := ConvertGeminiRequestToAntigravity("gemini-3-pro-preview", inputJSON, false)
-	outputStr := string(output)
-
-	// Check that thinking block is removed
-	parts := gjson.Get(outputStr, "request.contents.0.parts").Array()
-	if len(parts) != 1 {
-		t.Fatalf("Expected 1 part (thinking removed), got %d", len(parts))
-	}
-
-	// Only text part should remain
-	if parts[0].Get("thought").Bool() {
-		t.Error("Thinking block should be removed for Gemini")
-	}
-	if parts[0].Get("text").String() != "Here is my response" {
-		t.Errorf("Expected text 'Here is my response', got '%s'", parts[0].Get("text").String())
-	}
-}
-
 func TestConvertGeminiRequestToAntigravity_ParallelFunctionCalls(t *testing.T) {
 	// Multiple functionCalls should all get skip_thought_signature_validator
 	inputJSON := []byte(`{
--- a/internal/translator/antigravity/gemini/antigravity_gemini_response.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_response.go
@@ -41,6 +41,7 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
 			responseResult := gjson.GetBytes(rawJSON, "response")
 			if responseResult.Exists() {
 				chunk = []byte(responseResult.Raw)
+				chunk = restoreUsageMetadata(chunk)
 			}
 		} else {
 			chunkTemplate := "[]"
@@ -76,7 +77,8 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
 func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	responseResult := gjson.GetBytes(rawJSON, "response")
 	if responseResult.Exists() {
-		return responseResult.Raw
+		chunk := restoreUsageMetadata([]byte(responseResult.Raw))
+		return string(chunk)
 	}
 	return string(rawJSON)
 }
@@ -84,3 +86,15 @@ func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, or
 func GeminiTokenCount(ctx context.Context, count int64) string {
 	return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count)
 }
+
+// restoreUsageMetadata renames cpaUsageMetadata back to usageMetadata.
+// The executor renames usageMetadata to cpaUsageMetadata in non-terminal chunks
+// to preserve usage data while hiding it from clients that don't expect it.
+// When returning standard Gemini API format, we must restore the original name.
+func restoreUsageMetadata(chunk []byte) []byte {
+	if cpaUsage := gjson.GetBytes(chunk, "cpaUsageMetadata"); cpaUsage.Exists() {
+		chunk, _ = sjson.SetRawBytes(chunk, "usageMetadata", []byte(cpaUsage.Raw))
+		chunk, _ = sjson.DeleteBytes(chunk, "cpaUsageMetadata")
+	}
+	return chunk
+}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_response_test.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_response_test.go
@@ -0,0 +1,95 @@
+package gemini
+
+import (
+	"context"
+	"testing"
+)
+
+func TestRestoreUsageMetadata(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    []byte
+		expected string
+	}{
+		{
+			name:     "cpaUsageMetadata renamed to usageMetadata",
+			input:    []byte(`{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`,
+		},
+		{
+			name:     "no cpaUsageMetadata unchanged",
+			input:    []byte(`{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+		{
+			name:     "empty input",
+			input:    []byte(`{}`),
+			expected: `{}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := restoreUsageMetadata(tt.input)
+			if string(result) != tt.expected {
+				t.Errorf("restoreUsageMetadata() = %s, want %s", string(result), tt.expected)
+			}
+		})
+	}
+}
+
+func TestConvertAntigravityResponseToGeminiNonStream(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    []byte
+		expected string
+	}{
+		{
+			name:     "cpaUsageMetadata restored in response",
+			input:    []byte(`{"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+		{
+			name:     "usageMetadata preserved",
+			input:    []byte(`{"response":{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ConvertAntigravityResponseToGeminiNonStream(context.Background(), "", nil, nil, tt.input, nil)
+			if result != tt.expected {
+				t.Errorf("ConvertAntigravityResponseToGeminiNonStream() = %s, want %s", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestConvertAntigravityResponseToGeminiStream(t *testing.T) {
+	ctx := context.WithValue(context.Background(), "alt", "")
+
+	tests := []struct {
+		name     string
+		input    []byte
+		expected string
+	}{
+		{
+			name:     "cpaUsageMetadata restored in streaming response",
+			input:    []byte(`data: {"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			results := ConvertAntigravityResponseToGemini(ctx, "", nil, nil, tt.input, nil)
+			if len(results) != 1 {
+				t.Fatalf("expected 1 result, got %d", len(results))
+			}
+			if results[0] != tt.expected {
+				t.Errorf("ConvertAntigravityResponseToGemini() = %s, want %s", results[0], tt.expected)
+			}
+		})
+	}
+}
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -35,66 +35,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)

-	// Reasoning effort -> thinkingBudget/include_thoughts
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		if util.IsGemini3Model(modelName) {
-			switch effort {
-			case "none":
-				out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig")
-			case "auto":
-				includeThoughts := true
-				out = util.ApplyGeminiCLIThinkingLevel(out, "", &includeThoughts)
-			default:
-				if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
-					out = util.ApplyGeminiCLIThinkingLevel(out, level, nil)
-				}
-			}
-		} else if !util.ModelUsesThinkingLevels(modelName) {
-			out = util.ApplyReasoningEffortToGeminiCLI(out, effort)
-		}
-	}
-
-	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-
-			if v := tc.Get("thinkingBudget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			}
-
-			if v := tc.Get("includeThoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && budget != 0 {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-			}
-		}
-	}
-
-	// Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens
-	// This allows Claude Code and other Claude API clients to pass thinking configuration
-	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) {
-		if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-			if t.Get("type").String() == "enabled" {
-				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := int(b.Int())
-					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-				}
+		if effort != "" {
+			thinkingPath := "request.generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 			}
 		}
 	}
@@ -113,6 +66,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num)
 	}

+	// Candidate count (OpenAI 'n' parameter)
+	if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
+		if val := n.Int(); val > 1 {
+			out, _ = sjson.SetBytes(out, "request.generationConfig.candidateCount", val)
+		}
+	}
+
 	// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
 	if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
@@ -179,6 +139,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			}
 		}

+		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -188,16 +149,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
+					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
+					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
 						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
+							systemPartIndex++
 						}
 					}
 				}
@@ -212,7 +176,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range items {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							imageURL := item.Get("image_url.url").String()
@@ -256,6 +223,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.
@@ -334,12 +305,12 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		}
 	}

-	// tools -> request.tools[0].functionDeclarations + request.tools[0].googleSearch passthrough
+	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
-		toolNode := []byte(`{}`)
-		hasTool := false
+		functionToolNode := []byte(`{}`)
 		hasFunction := false
+		googleSearchNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -378,31 +349,37 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					}
 					fnRaw, _ = sjson.Delete(fnRaw, "strict")
 					if !hasFunction {
-						toolNode, _ = sjson.SetRawBytes(toolNode, "functionDeclarations", []byte("[]"))
+						functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
 					}
-					tmp, errSet := sjson.SetRawBytes(toolNode, "functionDeclarations.-1", []byte(fnRaw))
+					tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
 					if errSet != nil {
 						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
 						continue
 					}
-					toolNode = tmp
+					functionToolNode = tmp
 					hasFunction = true
-					hasTool = true
 				}
 			}
 			if gs := t.Get("google_search"); gs.Exists() {
+				googleToolNode := []byte(`{}`)
 				var errSet error
-				toolNode, errSet = sjson.SetRawBytes(toolNode, "googleSearch", []byte(gs.Raw))
+				googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
 				if errSet != nil {
 					log.Warnf("Failed to set googleSearch tool: %v", errSet)
 					continue
 				}
-				hasTool = true
+				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
 		}
-		if hasTool {
-			out, _ = sjson.SetRawBytes(out, "request.tools", []byte("[]"))
-			out, _ = sjson.SetRawBytes(out, "request.tools.0", toolNode)
+		if hasFunction || len(googleSearchNodes) > 0 {
+			toolsNode := []byte("[]")
+			if hasFunction {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
+			}
+			for _, googleNode := range googleSearchNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
+			}
+			out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
 		}
 	}

--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -15,6 +15,7 @@ import (
 	"strings"

 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -97,9 +98,8 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 		// Temperature setting for controlling response randomness
 		if temp := genConfig.Get("temperature"); temp.Exists() {
 			out, _ = sjson.Set(out, "temperature", temp.Float())
-		}
-		// Top P setting for nucleus sampling
-		if topP := genConfig.Get("topP"); topP.Exists() {
+		} else if topP := genConfig.Get("topP"); topP.Exists() {
+			// Top P setting for nucleus sampling (filtered out if temperature is set)
 			out, _ = sjson.Set(out, "top_p", topP.Float())
 		}
 		// Stop sequences configuration for custom termination conditions
@@ -114,15 +114,40 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
-		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-			// Check for thinkingBudget first - if present, enable thinking with budget
-			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
+		// Translator only does format conversion, ApplyThinking handles model capability validation.
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+				level := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
+				switch level {
+				case "":
+				case "none":
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				case "auto":
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				default:
+					if budget, ok := thinking.ConvertLevelToBudget(level); ok {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				budget := int(thinkingBudget.Int())
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				default:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+				}
+			} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
 				out, _ = sjson.Set(out, "thinking.type", "enabled")
-				normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
-				out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
 			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
-				// Fallback to include_thoughts if no budget specified
 				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -15,7 +15,7 @@ import (
 	"strings"

 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -65,10 +65,11 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream

 	root := gjson.ParseBytes(rawJSON)

-	if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	// Convert OpenAI reasoning_effort to Claude thinking config.
+	if v := root.Get("reasoning_effort"); v.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(v.String()))
 		if effort != "" {
-			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			budget, ok := thinking.ConvertLevelToBudget(effort)
 			if ok {
 				switch budget {
 				case 0:
@@ -109,10 +110,8 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 	// Temperature setting for controlling response randomness
 	if temp := root.Get("temperature"); temp.Exists() {
 		out, _ = sjson.Set(out, "temperature", temp.Float())
-	}
-
-	// Top P setting for nucleus sampling
-	if topP := root.Get("top_p"); topP.Exists() {
+	} else if topP := root.Get("top_p"); topP.Exists() {
+		// Top P setting for nucleus sampling (filtered out if temperature is set)
 		out, _ = sjson.Set(out, "top_p", topP.Float())
 	}

@@ -137,17 +136,35 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream

 	// Process messages and transform them to Claude Code format
 	if messages := root.Get("messages"); messages.Exists() && messages.IsArray() {
+		messageIndex := 0
+		systemMessageIndex := -1
 		messages.ForEach(func(_, message gjson.Result) bool {
 			role := message.Get("role").String()
 			contentResult := message.Get("content")

 			switch role {
-			case "system", "user", "assistant":
-				// Create Claude Code message with appropriate role mapping
-				if role == "system" {
-					role = "user"
+			case "system":
+				if systemMessageIndex == -1 {
+					systemMsg := `{"role":"user","content":[]}`
+					out, _ = sjson.SetRaw(out, "messages.-1", systemMsg)
+					systemMessageIndex = messageIndex
+					messageIndex++
 				}
-
+				if contentResult.Exists() && contentResult.Type == gjson.String && contentResult.String() != "" {
+					textPart := `{"type":"text","text":""}`
+					textPart, _ = sjson.Set(textPart, "text", contentResult.String())
+					out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
+				} else if contentResult.Exists() && contentResult.IsArray() {
+					contentResult.ForEach(func(_, part gjson.Result) bool {
+						if part.Get("type").String() == "text" {
+							textPart := `{"type":"text","text":""}`
+							textPart, _ = sjson.Set(textPart, "text", part.Get("text").String())
+							out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
+						}
+						return true
+					})
+				}
+			case "user", "assistant":
 				msg := `{"role":"","content":[]}`
 				msg, _ = sjson.Set(msg, "role", role)

@@ -226,6 +243,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				}

 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
+				messageIndex++

 			case "tool":
 				// Handle tool result messages conversion
@@ -236,6 +254,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				msg, _ = sjson.Set(msg, "content.0.tool_use_id", toolCallID)
 				msg, _ = sjson.Set(msg, "content.0.content", content)
 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
+				messageIndex++
 			}
 			return true
 		})
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,7 +10,7 @@ import (
 	"strings"

 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -53,10 +53,11 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte

 	root := gjson.ParseBytes(rawJSON)

-	if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	// Convert OpenAI Responses reasoning.effort to Claude thinking config.
+	if v := root.Get("reasoning.effort"); v.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(v.String()))
 		if effort != "" {
-			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			budget, ok := thinking.ConvertLevelToBudget(effort)
 			if ok {
 				switch budget {
 				case 0:
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,7 +12,7 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -51,7 +51,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	systemsResult := rootResult.Get("system")
 	if systemsResult.IsArray() {
 		systemResults := systemsResult.Array()
-		message := `{"type":"message","role":"user","content":[]}`
+		message := `{"type":"message","role":"developer","content":[]}`
 		for i := 0; i < len(systemResults); i++ {
 			systemResult := systemResults[i]
 			systemTypeResult := systemResult.Get("type")
@@ -217,21 +217,19 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)

-	// Convert thinking.budget_tokens to reasoning.effort for level-based models
-	reasoningEffort := "medium" // default
-	if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
-		switch thinking.Get("type").String() {
+	// Convert thinking.budget_tokens to reasoning.effort.
+	reasoningEffort := "medium"
+	if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+		switch thinkingConfig.Get("type").String() {
 		case "enabled":
-			if util.ModelUsesThinkingLevels(modelName) {
-				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
-					budget := int(budgetTokens.Int())
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
-						reasoningEffort = effort
-					}
+			if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
+				budget := int(budgetTokens.Int())
+				if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
+					reasoningEffort = effort
 				}
 			}
 		case "disabled":
-			if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
+			if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
 				reasoningEffort = effort
 			}
 		}
@@ -243,21 +241,23 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	template, _ = sjson.Set(template, "include", []string{"reasoning.encrypted_content"})

 	// Add a first message to ignore system instructions and ensure proper execution.
-	inputResult := gjson.Get(template, "input")
-	if inputResult.Exists() && inputResult.IsArray() {
-		inputResults := inputResult.Array()
-		newInput := "[]"
-		for i := 0; i < len(inputResults); i++ {
-			if i == 0 {
-				firstText := inputResults[i].Get("content.0.text")
-				firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
-				if firstText.Exists() && firstText.String() != firstInstructions {
-					newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+	if misc.GetCodexInstructionsEnabled() {
+		inputResult := gjson.Get(template, "input")
+		if inputResult.Exists() && inputResult.IsArray() {
+			inputResults := inputResult.Array()
+			newInput := "[]"
+			for i := 0; i < len(inputResults); i++ {
+				if i == 0 {
+					firstText := inputResults[i].Get("content.0.text")
+					firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
+					if firstText.Exists() && firstText.String() != firstInstructions {
+						newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+					}
 				}
+				newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
 			}
-			newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
+			template, _ = sjson.SetRaw(template, "input", newInput)
 		}
-		template, _ = sjson.SetRaw(template, "input", newInput)
 	}

 	return []byte(template)
--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 		} else {
 			template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
 		}
-		template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int())
-		template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int())
+		inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage"))
+		template, _ = sjson.Set(template, "usage.input_tokens", inputTokens)
+		template, _ = sjson.Set(template, "usage.output_tokens", outputTokens)
+		if cachedTokens > 0 {
+			template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens)
+		}

 		output = "event: message_delta\n"
 		output += fmt.Sprintf("data: %s\n\n", template)
@@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 	out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
 	out, _ = sjson.Set(out, "id", responseData.Get("id").String())
 	out, _ = sjson.Set(out, "model", responseData.Get("model").String())
-	out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
-	out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+	inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage"))
+	out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
+	out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
+	if cachedTokens > 0 {
+		out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
+	}

 	hasToolCall := false

@@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 		out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw)
 	}

-	if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() {
-		out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
-		out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+	return out
+}
+
+func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) {
+	if !usage.Exists() || usage.Type == gjson.Null {
+		return 0, 0, 0
 	}

-	return out
+	inputTokens := usage.Get("input_tokens").Int()
+	outputTokens := usage.Get("output_tokens").Int()
+	cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int()
+
+	if cachedTokens > 0 {
+		if inputTokens >= cachedTokens {
+			inputTokens -= cachedTokens
+		} else {
+			inputTokens = 0
+		}
+	}
+
+	return inputTokens, outputTokens, cachedTokens
 }

 // buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools.
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -14,6 +14,7 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -93,7 +94,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// System instruction -> as a user message with input_text parts
 	sysParts := root.Get("system_instruction.parts")
 	if sysParts.IsArray() {
-		msg := `{"type":"message","role":"user","content":[]}`
+		msg := `{"type":"message","role":"developer","content":[]}`
 		arr := sysParts.Array()
 		for i := 0; i < len(arr); i++ {
 			p := arr[i]
@@ -247,21 +248,28 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)

-	// Convert thinkingBudget to reasoning.effort for level-based models
-	reasoningEffort := "medium" // default
+	// Convert Gemini thinkingConfig to Codex reasoning.effort.
+	effortSet := false
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if util.ModelUsesThinkingLevels(modelName) {
-				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-					budget := int(thinkingBudget.Int())
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
-						reasoningEffort = effort
-					}
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
+				if effort != "" {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
+					effortSet = true
+				}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
+					effortSet = true
 				}
 			}
 		}
 	}
-	out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
+	if !effortSet {
+		// No thinking config, set default effort
+		out, _ = sjson.Set(out, "reasoning.effort", "medium")
+	}
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -33,7 +33,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	rawJSON := bytes.Clone(inputRawJSON)
 	userAgent := misc.ExtractCodexUserAgent(rawJSON)
 	// Start with empty JSON object
-	out := `{}`
+	out := `{"instructions":""}`

 	// Stream must be set to true
 	out, _ = sjson.Set(out, "stream", stream)
@@ -98,7 +98,9 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	// Extract system instructions from first system message (string or text object)
 	messages := gjson.GetBytes(rawJSON, "messages")
 	_, instructions := misc.CodexInstructionsForModel(modelName, "", userAgent)
-	out, _ = sjson.Set(out, "instructions", instructions)
+	if misc.GetCodexInstructionsEnabled() {
+		out, _ = sjson.Set(out, "instructions", instructions)
+	}
 	// if messages.IsArray() {
 	// 	arr := messages.Array()
 	// 	for i := 0; i < len(arr); i++ {
@@ -141,7 +143,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 				msg := `{}`
 				msg, _ = sjson.Set(msg, "type", "message")
 				if role == "system" {
-					msg, _ = sjson.Set(msg, "role", "user")
+					msg, _ = sjson.Set(msg, "role", "developer")
 				} else {
 					msg, _ = sjson.Set(msg, "role", role)
 				}
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -74,6 +74,11 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	}

 	if hasOfficialInstructions {
+		newInput := "[]"
+		for _, item := range inputResults {
+			newInput, _ = sjson.SetRaw(newInput, "-1", item.Raw)
+		}
+		rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(newInput))
 		return rawJSON
 	}
 	// log.Debugf("instructions not matched, %s\n", originalInstructions)
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -10,7 +10,6 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -160,12 +159,12 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 	}

 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -35,37 +35,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)

-	// Reasoning effort -> thinkingBudget/include_thoughts
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
-	}
-
-	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-
-			if v := tc.Get("thinkingBudget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			}
-
-			if v := tc.Get("includeThoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && budget != 0 {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	if re.Exists() {
+		effort := strings.ToLower(strings.TrimSpace(re.String()))
+		if effort != "" {
+			thinkingPath := "request.generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 			}
 		}
 	}
@@ -81,6 +63,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num)
 	}

+	// Candidate count (OpenAI 'n' parameter)
+	if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
+		if val := n.Int(); val > 1 {
+			out, _ = sjson.SetBytes(out, "request.generationConfig.candidateCount", val)
+		}
+	}
+
 	// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
 	if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
@@ -147,6 +136,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 			}
 		}

+		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -156,16 +146,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
+					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
+					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
 						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
+							systemPartIndex++
 						}
 					}
 				}
@@ -290,12 +283,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		}
 	}

-	// tools -> request.tools[0].functionDeclarations + request.tools[0].googleSearch passthrough
+	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
-		toolNode := []byte(`{}`)
-		hasTool := false
+		functionToolNode := []byte(`{}`)
 		hasFunction := false
+		googleSearchNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -334,31 +327,37 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 					}
 					fnRaw, _ = sjson.Delete(fnRaw, "strict")
 					if !hasFunction {
-						toolNode, _ = sjson.SetRawBytes(toolNode, "functionDeclarations", []byte("[]"))
+						functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
 					}
-					tmp, errSet := sjson.SetRawBytes(toolNode, "functionDeclarations.-1", []byte(fnRaw))
+					tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
 					if errSet != nil {
 						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
 						continue
 					}
-					toolNode = tmp
+					functionToolNode = tmp
 					hasFunction = true
-					hasTool = true
 				}
 			}
 			if gs := t.Get("google_search"); gs.Exists() {
+				googleToolNode := []byte(`{}`)
 				var errSet error
-				toolNode, errSet = sjson.SetRawBytes(toolNode, "googleSearch", []byte(gs.Raw))
+				googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
 				if errSet != nil {
 					log.Warnf("Failed to set googleSearch tool: %v", errSet)
 					continue
 				}
-				hasTool = true
+				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
 		}
-		if hasTool {
-			out, _ = sjson.SetRawBytes(out, "request.tools", []byte("[]"))
-			out, _ = sjson.SetRawBytes(out, "request.tools.0", toolNode)
+		if hasFunction || len(googleSearchNodes) > 0 {
+			toolsNode := []byte("[]")
+			if hasFunction {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
+			}
+			for _, googleNode := range googleSearchNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
+			}
+			out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
 		}
 	}

--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -10,7 +10,6 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -153,13 +152,13 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	}

 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	// Translator only does format conversion, ApplyThinking handles model capability validation.
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
+				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -35,55 +35,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)

-	// Reasoning effort -> thinkingBudget/include_thoughts
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
-	// Only apply numeric budgets for models that use budgets (not discrete levels) to avoid
-	// incorrectly applying thinkingBudget for level-based models like gpt-5. Gemini 3 models
-	// use thinkingLevel/includeThoughts instead.
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		if util.IsGemini3Model(modelName) {
-			switch effort {
-			case "none":
-				out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig")
-			case "auto":
-				includeThoughts := true
-				out = util.ApplyGeminiThinkingLevel(out, "", &includeThoughts)
-			default:
-				if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
-					out = util.ApplyGeminiThinkingLevel(out, level, nil)
-				}
-			}
-		} else if !util.ModelUsesThinkingLevels(modelName) {
-			out = util.ApplyReasoningEffortToGemini(out, effort)
-		}
-	}
-
-	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-
-			if v := tc.Get("thinkingBudget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			}
-
-			if v := tc.Get("includeThoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && budget != 0 {
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
+		if effort != "" {
+			thinkingPath := "generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 			}
 		}
 	}
@@ -99,6 +63,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num)
 	}

+	// Candidate count (OpenAI 'n' parameter)
+	if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
+		if val := n.Int(); val > 1 {
+			out, _ = sjson.SetBytes(out, "generationConfig.candidateCount", val)
+		}
+	}
+
 	// Map OpenAI modalities -> Gemini generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
 	if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
@@ -165,6 +136,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			}
 		}

+		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -174,16 +146,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				// system -> system_instruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
-					out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.String())
+					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
-					out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.Get("text").String())
+					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
-						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
+						out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
+							systemPartIndex++
 						}
 					}
 				}
@@ -198,7 +173,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					for _, item := range items {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							imageURL := item.Get("image_url.url").String()
@@ -243,6 +221,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.
@@ -307,12 +289,12 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		}
 	}

-	// tools -> tools[0].functionDeclarations + tools[0].googleSearch passthrough
+	// tools -> tools[].functionDeclarations + tools[].googleSearch passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
-		toolNode := []byte(`{}`)
-		hasTool := false
+		functionToolNode := []byte(`{}`)
 		hasFunction := false
+		googleSearchNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -351,31 +333,37 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					}
 					fnRaw, _ = sjson.Delete(fnRaw, "strict")
 					if !hasFunction {
-						toolNode, _ = sjson.SetRawBytes(toolNode, "functionDeclarations", []byte("[]"))
+						functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
 					}
-					tmp, errSet := sjson.SetRawBytes(toolNode, "functionDeclarations.-1", []byte(fnRaw))
+					tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
 					if errSet != nil {
 						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
 						continue
 					}
-					toolNode = tmp
+					functionToolNode = tmp
 					hasFunction = true
-					hasTool = true
 				}
 			}
 			if gs := t.Get("google_search"); gs.Exists() {
+				googleToolNode := []byte(`{}`)
 				var errSet error
-				toolNode, errSet = sjson.SetRawBytes(toolNode, "googleSearch", []byte(gs.Raw))
+				googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
 				if errSet != nil {
 					log.Warnf("Failed to set googleSearch tool: %v", errSet)
 					continue
 				}
-				hasTool = true
+				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
 		}
-		if hasTool {
-			out, _ = sjson.SetRawBytes(out, "tools", []byte("[]"))
-			out, _ = sjson.SetRawBytes(out, "tools.0", toolNode)
+		if hasFunction || len(googleSearchNodes) > 0 {
+			toolsNode := []byte("[]")
+			if hasFunction {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
+			}
+			for _, googleNode := range googleSearchNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
+			}
+			out, _ = sjson.SetRawBytes(out, "tools", toolsNode)
 		}
 	}

--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -21,7 +21,8 @@ import (
 // convertGeminiResponseToOpenAIChatParams holds parameters for response conversion.
 type convertGeminiResponseToOpenAIChatParams struct {
 	UnixTimestamp int64
-	FunctionIndex int
+	// FunctionIndex tracks tool call indices per candidate index to support multiple candidates.
+	FunctionIndex map[int]int
 }

 // functionCallIDCounter provides a process-wide unique counter for function call identifiers.
@@ -42,13 +43,20 @@ var functionCallIDCounter uint64
 // Returns:
 //   - []string: A slice of strings, each containing an OpenAI-compatible JSON response
 func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string {
+	// Initialize parameters if nil.
 	if *param == nil {
 		*param = &convertGeminiResponseToOpenAIChatParams{
 			UnixTimestamp: 0,
-			FunctionIndex: 0,
+			FunctionIndex: make(map[int]int),
 		}
 	}

+	// Ensure the Map is initialized (handling cases where param might be reused from older context).
+	p := (*param).(*convertGeminiResponseToOpenAIChatParams)
+	if p.FunctionIndex == nil {
+		p.FunctionIndex = make(map[int]int)
+	}
+
 	if bytes.HasPrefix(rawJSON, []byte("data:")) {
 		rawJSON = bytes.TrimSpace(rawJSON[5:])
 	}
@@ -57,151 +65,179 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 		return []string{}
 	}

-	// Initialize the OpenAI SSE template.
-	template := `{"id":"","object":"chat.completion.chunk","created":12345,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
+	// Initialize the OpenAI SSE base template.
+	// We use a base template and clone it for each candidate to support multiple candidates.
+	baseTemplate := `{"id":"","object":"chat.completion.chunk","created":12345,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`

 	// Extract and set the model version.
 	if modelVersionResult := gjson.GetBytes(rawJSON, "modelVersion"); modelVersionResult.Exists() {
-		template, _ = sjson.Set(template, "model", modelVersionResult.String())
+		baseTemplate, _ = sjson.Set(baseTemplate, "model", modelVersionResult.String())
 	}

 	// Extract and set the creation timestamp.
 	if createTimeResult := gjson.GetBytes(rawJSON, "createTime"); createTimeResult.Exists() {
 		t, err := time.Parse(time.RFC3339Nano, createTimeResult.String())
 		if err == nil {
-			(*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp = t.Unix()
+			p.UnixTimestamp = t.Unix()
 		}
-		template, _ = sjson.Set(template, "created", (*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp)
+		baseTemplate, _ = sjson.Set(baseTemplate, "created", p.UnixTimestamp)
 	} else {
-		template, _ = sjson.Set(template, "created", (*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp)
+		baseTemplate, _ = sjson.Set(baseTemplate, "created", p.UnixTimestamp)
 	}

 	// Extract and set the response ID.
 	if responseIDResult := gjson.GetBytes(rawJSON, "responseId"); responseIDResult.Exists() {
-		template, _ = sjson.Set(template, "id", responseIDResult.String())
-	}
-
-	// Extract and set the finish reason.
-	if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
+		baseTemplate, _ = sjson.Set(baseTemplate, "id", responseIDResult.String())
 	}

 	// Extract and set usage metadata (token counts).
+	// Usage is applied to the base template so it appears in the chunks.
 	if usageResult := gjson.GetBytes(rawJSON, "usageMetadata"); usageResult.Exists() {
 		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
-			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
+			baseTemplate, _ = sjson.Set(baseTemplate, "usage.completion_tokens", candidatesTokenCountResult.Int())
 		}
 		if totalTokenCountResult := usageResult.Get("totalTokenCount"); totalTokenCountResult.Exists() {
-			template, _ = sjson.Set(template, "usage.total_tokens", totalTokenCountResult.Int())
+			baseTemplate, _ = sjson.Set(baseTemplate, "usage.total_tokens", totalTokenCountResult.Int())
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int() - cachedTokenCount
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
-		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
+		baseTemplate, _ = sjson.Set(baseTemplate, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
-			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
+			baseTemplate, _ = sjson.Set(baseTemplate, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
 		// Include cached token count if present (indicates prompt caching is working)
 		if cachedTokenCount > 0 {
 			var err error
-			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			baseTemplate, err = sjson.Set(baseTemplate, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
 			if err != nil {
 				log.Warnf("gemini openai response: failed to set cached_tokens in streaming: %v", err)
 			}
 		}
 	}

-	// Process the main content part of the response.
-	partsResult := gjson.GetBytes(rawJSON, "candidates.0.content.parts")
-	hasFunctionCall := false
-	if partsResult.IsArray() {
-		partResults := partsResult.Array()
-		for i := 0; i < len(partResults); i++ {
-			partResult := partResults[i]
-			partTextResult := partResult.Get("text")
-			functionCallResult := partResult.Get("functionCall")
-			inlineDataResult := partResult.Get("inlineData")
-			if !inlineDataResult.Exists() {
-				inlineDataResult = partResult.Get("inline_data")
-			}
-			thoughtSignatureResult := partResult.Get("thoughtSignature")
-			if !thoughtSignatureResult.Exists() {
-				thoughtSignatureResult = partResult.Get("thought_signature")
+	var responseStrings []string
+	candidates := gjson.GetBytes(rawJSON, "candidates")
+
+	// Iterate over all candidates to support candidate_count > 1.
+	if candidates.IsArray() {
+		candidates.ForEach(func(_, candidate gjson.Result) bool {
+			// Clone the template for the current candidate.
+			template := baseTemplate
+
+			// Set the specific index for this candidate.
+			candidateIndex := int(candidate.Get("index").Int())
+			template, _ = sjson.Set(template, "choices.0.index", candidateIndex)
+
+			// Extract and set the finish reason.
+			if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
+				template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
+				template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
 			}

-			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
-			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+			partsResult := candidate.Get("content.parts")
+			hasFunctionCall := false

-			// Skip pure thoughtSignature parts but keep any actual payload in the same part.
-			if hasThoughtSignature && !hasContentPayload {
-				continue
+			if partsResult.IsArray() {
+				partResults := partsResult.Array()
+				for i := 0; i < len(partResults); i++ {
+					partResult := partResults[i]
+					partTextResult := partResult.Get("text")
+					functionCallResult := partResult.Get("functionCall")
+					inlineDataResult := partResult.Get("inlineData")
+					if !inlineDataResult.Exists() {
+						inlineDataResult = partResult.Get("inline_data")
+					}
+					thoughtSignatureResult := partResult.Get("thoughtSignature")
+					if !thoughtSignatureResult.Exists() {
+						thoughtSignatureResult = partResult.Get("thought_signature")
+					}
+
+					hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+					hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+					// Skip pure thoughtSignature parts but keep any actual payload in the same part.
+					if hasThoughtSignature && !hasContentPayload {
+						continue
+					}
+
+					if partTextResult.Exists() {
+						text := partTextResult.String()
+						// Handle text content, distinguishing between regular content and reasoning/thoughts.
+						if partResult.Get("thought").Bool() {
+							template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text)
+						} else {
+							template, _ = sjson.Set(template, "choices.0.delta.content", text)
+						}
+						template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
+					} else if functionCallResult.Exists() {
+						// Handle function call content.
+						hasFunctionCall = true
+						toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
+
+						// Retrieve the function index for this specific candidate.
+						functionCallIndex := p.FunctionIndex[candidateIndex]
+						p.FunctionIndex[candidateIndex]++
+
+						if toolCallsResult.Exists() && toolCallsResult.IsArray() {
+							functionCallIndex = len(toolCallsResult.Array())
+						} else {
+							template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
+						}
+
+						functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
+						fcName := functionCallResult.Get("name").String()
+						functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
+						functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
+						functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
+						if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
+							functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.arguments", fcArgsResult.Raw)
+						}
+						template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
+						template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
+					} else if inlineDataResult.Exists() {
+						data := inlineDataResult.Get("data").String()
+						if data == "" {
+							continue
+						}
+						mimeType := inlineDataResult.Get("mimeType").String()
+						if mimeType == "" {
+							mimeType = inlineDataResult.Get("mime_type").String()
+						}
+						if mimeType == "" {
+							mimeType = "image/png"
+						}
+						imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
+						imagesResult := gjson.Get(template, "choices.0.delta.images")
+						if !imagesResult.Exists() || !imagesResult.IsArray() {
+							template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
+						}
+						imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array())
+						imagePayload := `{"type":"image_url","image_url":{"url":""}}`
+						imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
+						imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
+						template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
+						template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload)
+					}
+				}
 			}

-			if partTextResult.Exists() {
-				text := partTextResult.String()
-				// Handle text content, distinguishing between regular content and reasoning/thoughts.
-				if partResult.Get("thought").Bool() {
-					template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text)
-				} else {
-					template, _ = sjson.Set(template, "choices.0.delta.content", text)
-				}
-				template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-			} else if functionCallResult.Exists() {
-				// Handle function call content.
-				hasFunctionCall = true
-				toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
-				functionCallIndex := (*param).(*convertGeminiResponseToOpenAIChatParams).FunctionIndex
-				(*param).(*convertGeminiResponseToOpenAIChatParams).FunctionIndex++
-				if toolCallsResult.Exists() && toolCallsResult.IsArray() {
-					functionCallIndex = len(toolCallsResult.Array())
-				} else {
-					template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
-				}
-
-				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
-				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
-				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
-					functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.arguments", fcArgsResult.Raw)
-				}
-				template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
-			} else if inlineDataResult.Exists() {
-				data := inlineDataResult.Get("data").String()
-				if data == "" {
-					continue
-				}
-				mimeType := inlineDataResult.Get("mimeType").String()
-				if mimeType == "" {
-					mimeType = inlineDataResult.Get("mime_type").String()
-				}
-				if mimeType == "" {
-					mimeType = "image/png"
-				}
-				imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
-				imagesResult := gjson.Get(template, "choices.0.delta.images")
-				if !imagesResult.Exists() || !imagesResult.IsArray() {
-					template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
-				}
-				imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array())
-				imagePayload := `{"type":"image_url","image_url":{"url":""}}`
-				imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
-				imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
-				template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload)
+			if hasFunctionCall {
+				template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
+				template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
 			}
+
+			responseStrings = append(responseStrings, template)
+			return true // continue loop
+		})
+	} else {
+		// If there are no candidates (e.g., a pure usageMetadata chunk), return the usage chunk if present.
+		if gjson.GetBytes(rawJSON, "usageMetadata").Exists() && len(responseStrings) == 0 {
+			responseStrings = append(responseStrings, baseTemplate)
 		}
 	}

-	if hasFunctionCall {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
-	}
-
-	return []string{template}
+	return responseStrings
 }

 // ConvertGeminiResponseToOpenAINonStream converts a non-streaming Gemini response to a non-streaming OpenAI response.
@@ -219,7 +255,9 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 //   - string: An OpenAI-compatible JSON response containing all message content and metadata
 func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	var unixTimestamp int64
-	template := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
+	// Initialize template with an empty choices array to support multiple candidates.
+	template := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[]}`
+
 	if modelVersionResult := gjson.GetBytes(rawJSON, "modelVersion"); modelVersionResult.Exists() {
 		template, _ = sjson.Set(template, "model", modelVersionResult.String())
 	}
@@ -238,11 +276,6 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 		template, _ = sjson.Set(template, "id", responseIDResult.String())
 	}

-	if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
-	}
-
 	if usageResult := gjson.GetBytes(rawJSON, "usageMetadata"); usageResult.Exists() {
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
@@ -267,74 +300,96 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 		}
 	}

-	// Process the main content part of the response.
-	partsResult := gjson.GetBytes(rawJSON, "candidates.0.content.parts")
-	hasFunctionCall := false
-	if partsResult.IsArray() {
-		partsResults := partsResult.Array()
-		for i := 0; i < len(partsResults); i++ {
-			partResult := partsResults[i]
-			partTextResult := partResult.Get("text")
-			functionCallResult := partResult.Get("functionCall")
-			inlineDataResult := partResult.Get("inlineData")
-			if !inlineDataResult.Exists() {
-				inlineDataResult = partResult.Get("inline_data")
+	// Process the main content part of the response for all candidates.
+	candidates := gjson.GetBytes(rawJSON, "candidates")
+	if candidates.IsArray() {
+		candidates.ForEach(func(_, candidate gjson.Result) bool {
+			// Construct a single Choice object.
+			choiceTemplate := `{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}`
+
+			// Set the index for this choice.
+			choiceTemplate, _ = sjson.Set(choiceTemplate, "index", candidate.Get("index").Int())
+
+			// Set finish reason.
+			if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "finish_reason", strings.ToLower(finishReasonResult.String()))
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "native_finish_reason", strings.ToLower(finishReasonResult.String()))
 			}

-			if partTextResult.Exists() {
-				// Append text content, distinguishing between regular content and reasoning.
-				if partResult.Get("thought").Bool() {
-					template, _ = sjson.Set(template, "choices.0.message.reasoning_content", partTextResult.String())
-				} else {
-					template, _ = sjson.Set(template, "choices.0.message.content", partTextResult.String())
-				}
-				template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
-			} else if functionCallResult.Exists() {
-				// Append function call content to the tool_calls array.
-				hasFunctionCall = true
-				toolCallsResult := gjson.Get(template, "choices.0.message.tool_calls")
-				if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
-					template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls", `[]`)
-				}
-				functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
-				fcName := functionCallResult.Get("name").String()
-				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
-				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
-				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
-					functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
-				}
-				template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate)
-			} else if inlineDataResult.Exists() {
-				data := inlineDataResult.Get("data").String()
-				if data == "" {
-					continue
-				}
-				mimeType := inlineDataResult.Get("mimeType").String()
-				if mimeType == "" {
-					mimeType = inlineDataResult.Get("mime_type").String()
-				}
-				if mimeType == "" {
-					mimeType = "image/png"
-				}
-				imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
-				imagesResult := gjson.Get(template, "choices.0.message.images")
-				if !imagesResult.Exists() || !imagesResult.IsArray() {
-					template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`)
-				}
-				imageIndex := len(gjson.Get(template, "choices.0.message.images").Array())
-				imagePayload := `{"type":"image_url","image_url":{"url":""}}`
-				imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
-				imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
-				template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", imagePayload)
-			}
-		}
-	}
+			partsResult := candidate.Get("content.parts")
+			hasFunctionCall := false
+			if partsResult.IsArray() {
+				partsResults := partsResult.Array()
+				for i := 0; i < len(partsResults); i++ {
+					partResult := partsResults[i]
+					partTextResult := partResult.Get("text")
+					functionCallResult := partResult.Get("functionCall")
+					inlineDataResult := partResult.Get("inlineData")
+					if !inlineDataResult.Exists() {
+						inlineDataResult = partResult.Get("inline_data")
+					}

-	if hasFunctionCall {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
+					if partTextResult.Exists() {
+						// Append text content, distinguishing between regular content and reasoning.
+						if partResult.Get("thought").Bool() {
+							oldVal := gjson.Get(choiceTemplate, "message.reasoning_content").String()
+							choiceTemplate, _ = sjson.Set(choiceTemplate, "message.reasoning_content", oldVal+partTextResult.String())
+						} else {
+							oldVal := gjson.Get(choiceTemplate, "message.content").String()
+							choiceTemplate, _ = sjson.Set(choiceTemplate, "message.content", oldVal+partTextResult.String())
+						}
+						choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
+					} else if functionCallResult.Exists() {
+						// Append function call content to the tool_calls array.
+						hasFunctionCall = true
+						toolCallsResult := gjson.Get(choiceTemplate, "message.tool_calls")
+						if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
+							choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.tool_calls", `[]`)
+						}
+						functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
+						fcName := functionCallResult.Get("name").String()
+						functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
+						functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
+						if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
+							functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
+						}
+						choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
+						choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.tool_calls.-1", functionCallItemTemplate)
+					} else if inlineDataResult.Exists() {
+						data := inlineDataResult.Get("data").String()
+						if data != "" {
+							mimeType := inlineDataResult.Get("mimeType").String()
+							if mimeType == "" {
+								mimeType = inlineDataResult.Get("mime_type").String()
+							}
+							if mimeType == "" {
+								mimeType = "image/png"
+							}
+							imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
+							imagesResult := gjson.Get(choiceTemplate, "message.images")
+							if !imagesResult.Exists() || !imagesResult.IsArray() {
+								choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.images", `[]`)
+							}
+							imageIndex := len(gjson.Get(choiceTemplate, "message.images").Array())
+							imagePayload := `{"type":"image_url","image_url":{"url":""}}`
+							imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
+							imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
+							choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
+							choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.images.-1", imagePayload)
+						}
+					}
+				}
+			}
+
+			if hasFunctionCall {
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "finish_reason", "tool_calls")
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "native_finish_reason", "tool_calls")
+			}
+
+			// Append the constructed choice to the main choices array.
+			template, _ = sjson.SetRaw(template, "choices.-1", choiceTemplate)
+			return true
+		})
 	}

 	return template
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -5,7 +5,6 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -299,6 +298,15 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 				}
 				functionContent, _ = sjson.SetRaw(functionContent, "parts.-1", functionResponse)
 				out, _ = sjson.SetRaw(out, "contents.-1", functionContent)
+
+			case "reasoning":
+				thoughtContent := `{"role":"model","parts":[]}`
+				thought := `{"text":"","thoughtSignature":"","thought":true}`
+				thought, _ = sjson.Set(thought, "text", item.Get("summary.0.text").String())
+				thought, _ = sjson.Set(thought, "thoughtSignature", item.Get("encrypted_content").String())
+
+				thoughtContent, _ = sjson.SetRaw(thoughtContent, "parts.-1", thought)
+				out, _ = sjson.SetRaw(out, "contents.-1", thoughtContent)
 			}
 		}
 	} else if input.Exists() && input.Type == gjson.String {
@@ -388,31 +396,19 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 		out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
 	}

-	// OpenAI official reasoning fields take precedence
-	// Only convert for models that use numeric budgets (not discrete levels).
-	hasOfficialThinking := root.Get("reasoning.effort").Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		reasoningEffort := root.Get("reasoning.effort")
-		out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
-	}
-
-	// Cherry Studio extension (applies only when official fields are missing)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-			if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			}
-			if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget {
-				if budget != 0 {
-					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-				}
+	// Apply thinking configuration: convert OpenAI Responses API reasoning.effort to Gemini thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
+	re := root.Get("reasoning.effort")
+	if re.Exists() {
+		effort := strings.ToLower(strings.TrimSpace(re.String()))
+		if effort != "" {
+			thinkingPath := "generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.Set(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.Set(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.Set(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.Set(out, thinkingPath+".includeThoughts", effort != "none")
 			}
 		}
 	}
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
@@ -20,6 +20,7 @@ type geminiToResponsesState struct {

 	// message aggregation
 	MsgOpened    bool
+	MsgClosed    bool
 	MsgIndex     int
 	CurrentMsgID string
 	TextBuf      strings.Builder
@@ -29,6 +30,7 @@ type geminiToResponsesState struct {
 	ReasoningOpened bool
 	ReasoningIndex  int
 	ReasoningItemID string
+	ReasoningEnc    string
 	ReasoningBuf    strings.Builder
 	ReasoningClosed bool

@@ -37,6 +39,7 @@ type geminiToResponsesState struct {
 	FuncArgsBuf map[int]*strings.Builder
 	FuncNames   map[int]string
 	FuncCallIDs map[int]string
+	FuncDone    map[int]bool
 }

 // responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
@@ -45,6 +48,39 @@ var responseIDCounter uint64
 // funcCallIDCounter provides a process-wide unique counter for function call identifiers.
 var funcCallIDCounter uint64

+func pickRequestJSON(originalRequestRawJSON, requestRawJSON []byte) []byte {
+	if len(originalRequestRawJSON) > 0 && gjson.ValidBytes(originalRequestRawJSON) {
+		return originalRequestRawJSON
+	}
+	if len(requestRawJSON) > 0 && gjson.ValidBytes(requestRawJSON) {
+		return requestRawJSON
+	}
+	return nil
+}
+
+func unwrapRequestRoot(root gjson.Result) gjson.Result {
+	req := root.Get("request")
+	if !req.Exists() {
+		return root
+	}
+	if req.Get("model").Exists() || req.Get("input").Exists() || req.Get("instructions").Exists() {
+		return req
+	}
+	return root
+}
+
+func unwrapGeminiResponseRoot(root gjson.Result) gjson.Result {
+	resp := root.Get("response")
+	if !resp.Exists() {
+		return root
+	}
+	// Vertex-style Gemini responses wrap the actual payload in a "response" object.
+	if resp.Get("candidates").Exists() || resp.Get("responseId").Exists() || resp.Get("usageMetadata").Exists() {
+		return resp
+	}
+	return root
+}
+
 func emitEvent(event string, payload string) string {
 	return fmt.Sprintf("event: %s\ndata: %s", event, payload)
 }
@@ -56,18 +92,37 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 			FuncArgsBuf: make(map[int]*strings.Builder),
 			FuncNames:   make(map[int]string),
 			FuncCallIDs: make(map[int]string),
+			FuncDone:    make(map[int]bool),
 		}
 	}
 	st := (*param).(*geminiToResponsesState)
+	if st.FuncArgsBuf == nil {
+		st.FuncArgsBuf = make(map[int]*strings.Builder)
+	}
+	if st.FuncNames == nil {
+		st.FuncNames = make(map[int]string)
+	}
+	if st.FuncCallIDs == nil {
+		st.FuncCallIDs = make(map[int]string)
+	}
+	if st.FuncDone == nil {
+		st.FuncDone = make(map[int]bool)
+	}

 	if bytes.HasPrefix(rawJSON, []byte("data:")) {
 		rawJSON = bytes.TrimSpace(rawJSON[5:])
 	}

+	rawJSON = bytes.TrimSpace(rawJSON)
+	if len(rawJSON) == 0 || bytes.Equal(rawJSON, []byte("[DONE]")) {
+		return []string{}
+	}
+
 	root := gjson.ParseBytes(rawJSON)
 	if !root.Exists() {
 		return []string{}
 	}
+	root = unwrapGeminiResponseRoot(root)

 	var out []string
 	nextSeq := func() int { st.Seq++; return st.Seq }
@@ -98,19 +153,54 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 		itemDone, _ = sjson.Set(itemDone, "sequence_number", nextSeq())
 		itemDone, _ = sjson.Set(itemDone, "item.id", st.ReasoningItemID)
 		itemDone, _ = sjson.Set(itemDone, "output_index", st.ReasoningIndex)
+		itemDone, _ = sjson.Set(itemDone, "item.encrypted_content", st.ReasoningEnc)
 		itemDone, _ = sjson.Set(itemDone, "item.summary.0.text", full)
 		out = append(out, emitEvent("response.output_item.done", itemDone))

 		st.ReasoningClosed = true
 	}

+	// Helper to finalize the assistant message in correct order.
+	// It emits response.output_text.done, response.content_part.done,
+	// and response.output_item.done exactly once.
+	finalizeMessage := func() {
+		if !st.MsgOpened || st.MsgClosed {
+			return
+		}
+		fullText := st.ItemTextBuf.String()
+		done := `{"type":"response.output_text.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"text":"","logprobs":[]}`
+		done, _ = sjson.Set(done, "sequence_number", nextSeq())
+		done, _ = sjson.Set(done, "item_id", st.CurrentMsgID)
+		done, _ = sjson.Set(done, "output_index", st.MsgIndex)
+		done, _ = sjson.Set(done, "text", fullText)
+		out = append(out, emitEvent("response.output_text.done", done))
+		partDone := `{"type":"response.content_part.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""}}`
+		partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
+		partDone, _ = sjson.Set(partDone, "item_id", st.CurrentMsgID)
+		partDone, _ = sjson.Set(partDone, "output_index", st.MsgIndex)
+		partDone, _ = sjson.Set(partDone, "part.text", fullText)
+		out = append(out, emitEvent("response.content_part.done", partDone))
+		final := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"completed","content":[{"type":"output_text","text":""}],"role":"assistant"}}`
+		final, _ = sjson.Set(final, "sequence_number", nextSeq())
+		final, _ = sjson.Set(final, "output_index", st.MsgIndex)
+		final, _ = sjson.Set(final, "item.id", st.CurrentMsgID)
+		final, _ = sjson.Set(final, "item.content.0.text", fullText)
+		out = append(out, emitEvent("response.output_item.done", final))
+
+		st.MsgClosed = true
+	}
+
 	// Initialize per-response fields and emit created/in_progress once
 	if !st.Started {
-		if v := root.Get("responseId"); v.Exists() {
-			st.ResponseID = v.String()
+		st.ResponseID = root.Get("responseId").String()
+		if st.ResponseID == "" {
+			st.ResponseID = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
+		}
+		if !strings.HasPrefix(st.ResponseID, "resp_") {
+			st.ResponseID = fmt.Sprintf("resp_%s", st.ResponseID)
 		}
 		if v := root.Get("createTime"); v.Exists() {
-			if t, err := time.Parse(time.RFC3339Nano, v.String()); err == nil {
+			if t, errParseCreateTime := time.Parse(time.RFC3339Nano, v.String()); errParseCreateTime == nil {
 				st.CreatedAt = t.Unix()
 			}
 		}
@@ -143,15 +233,21 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 					// Ignore any late thought chunks after reasoning is finalized.
 					return true
 				}
+				if sig := part.Get("thoughtSignature"); sig.Exists() && sig.String() != "" && sig.String() != geminiResponsesThoughtSignature {
+					st.ReasoningEnc = sig.String()
+				} else if sig = part.Get("thought_signature"); sig.Exists() && sig.String() != "" && sig.String() != geminiResponsesThoughtSignature {
+					st.ReasoningEnc = sig.String()
+				}
 				if !st.ReasoningOpened {
 					st.ReasoningOpened = true
 					st.ReasoningIndex = st.NextIndex
 					st.NextIndex++
 					st.ReasoningItemID = fmt.Sprintf("rs_%s_%d", st.ResponseID, st.ReasoningIndex)
-					item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"reasoning","status":"in_progress","summary":[]}}`
+					item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"reasoning","status":"in_progress","encrypted_content":"","summary":[]}}`
 					item, _ = sjson.Set(item, "sequence_number", nextSeq())
 					item, _ = sjson.Set(item, "output_index", st.ReasoningIndex)
 					item, _ = sjson.Set(item, "item.id", st.ReasoningItemID)
+					item, _ = sjson.Set(item, "item.encrypted_content", st.ReasoningEnc)
 					out = append(out, emitEvent("response.output_item.added", item))
 					partAdded := `{"type":"response.reasoning_summary_part.added","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}`
 					partAdded, _ = sjson.Set(partAdded, "sequence_number", nextSeq())
@@ -191,9 +287,9 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 					partAdded, _ = sjson.Set(partAdded, "output_index", st.MsgIndex)
 					out = append(out, emitEvent("response.content_part.added", partAdded))
 					st.ItemTextBuf.Reset()
-					st.ItemTextBuf.WriteString(t.String())
 				}
 				st.TextBuf.WriteString(t.String())
+				st.ItemTextBuf.WriteString(t.String())
 				msg := `{"type":"response.output_text.delta","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"delta":"","logprobs":[]}`
 				msg, _ = sjson.Set(msg, "sequence_number", nextSeq())
 				msg, _ = sjson.Set(msg, "item_id", st.CurrentMsgID)
@@ -205,8 +301,10 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,

 			// Function call
 			if fc := part.Get("functionCall"); fc.Exists() {
-				// Before emitting function-call outputs, finalize reasoning if open.
+				// Before emitting function-call outputs, finalize reasoning and the message (if open).
+				// Responses streaming requires message done events before the next output_item.added.
 				finalizeReasoning()
+				finalizeMessage()
 				name := fc.Get("name").String()
 				idx := st.NextIndex
 				st.NextIndex++
@@ -219,6 +317,14 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 				}
 				st.FuncNames[idx] = name

+				argsJSON := "{}"
+				if args := fc.Get("args"); args.Exists() {
+					argsJSON = args.Raw
+				}
+				if st.FuncArgsBuf[idx].Len() == 0 && argsJSON != "" {
+					st.FuncArgsBuf[idx].WriteString(argsJSON)
+				}
+
 				// Emit item.added for function call
 				item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"function_call","status":"in_progress","arguments":"","call_id":"","name":""}}`
 				item, _ = sjson.Set(item, "sequence_number", nextSeq())
@@ -228,10 +334,9 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 				item, _ = sjson.Set(item, "item.name", name)
 				out = append(out, emitEvent("response.output_item.added", item))

-				// Emit arguments delta (full args in one chunk)
-				if args := fc.Get("args"); args.Exists() {
-					argsJSON := args.Raw
-					st.FuncArgsBuf[idx].WriteString(argsJSON)
+				// Emit arguments delta (full args in one chunk).
+				// When Gemini omits args, emit "{}" to keep Responses streaming event order consistent.
+				if argsJSON != "" {
 					ad := `{"type":"response.function_call_arguments.delta","sequence_number":0,"item_id":"","output_index":0,"delta":""}`
 					ad, _ = sjson.Set(ad, "sequence_number", nextSeq())
 					ad, _ = sjson.Set(ad, "item_id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx]))
@@ -240,6 +345,27 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 					out = append(out, emitEvent("response.function_call_arguments.delta", ad))
 				}

+				// Gemini emits the full function call payload at once, so we can finalize it immediately.
+				if !st.FuncDone[idx] {
+					fcDone := `{"type":"response.function_call_arguments.done","sequence_number":0,"item_id":"","output_index":0,"arguments":""}`
+					fcDone, _ = sjson.Set(fcDone, "sequence_number", nextSeq())
+					fcDone, _ = sjson.Set(fcDone, "item_id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx]))
+					fcDone, _ = sjson.Set(fcDone, "output_index", idx)
+					fcDone, _ = sjson.Set(fcDone, "arguments", argsJSON)
+					out = append(out, emitEvent("response.function_call_arguments.done", fcDone))
+
+					itemDone := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}}`
+					itemDone, _ = sjson.Set(itemDone, "sequence_number", nextSeq())
+					itemDone, _ = sjson.Set(itemDone, "output_index", idx)
+					itemDone, _ = sjson.Set(itemDone, "item.id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx]))
+					itemDone, _ = sjson.Set(itemDone, "item.arguments", argsJSON)
+					itemDone, _ = sjson.Set(itemDone, "item.call_id", st.FuncCallIDs[idx])
+					itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx])
+					out = append(out, emitEvent("response.output_item.done", itemDone))
+
+					st.FuncDone[idx] = true
+				}
+
 				return true
 			}

@@ -251,28 +377,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 	if fr := root.Get("candidates.0.finishReason"); fr.Exists() && fr.String() != "" {
 		// Finalize reasoning first to keep ordering tight with last delta
 		finalizeReasoning()
-		// Close message output if opened
-		if st.MsgOpened {
-			fullText := st.ItemTextBuf.String()
-			done := `{"type":"response.output_text.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"text":"","logprobs":[]}`
-			done, _ = sjson.Set(done, "sequence_number", nextSeq())
-			done, _ = sjson.Set(done, "item_id", st.CurrentMsgID)
-			done, _ = sjson.Set(done, "output_index", st.MsgIndex)
-			done, _ = sjson.Set(done, "text", fullText)
-			out = append(out, emitEvent("response.output_text.done", done))
-			partDone := `{"type":"response.content_part.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""}}`
-			partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
-			partDone, _ = sjson.Set(partDone, "item_id", st.CurrentMsgID)
-			partDone, _ = sjson.Set(partDone, "output_index", st.MsgIndex)
-			partDone, _ = sjson.Set(partDone, "part.text", fullText)
-			out = append(out, emitEvent("response.content_part.done", partDone))
-			final := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"completed","content":[{"type":"output_text","text":""}],"role":"assistant"}}`
-			final, _ = sjson.Set(final, "sequence_number", nextSeq())
-			final, _ = sjson.Set(final, "output_index", st.MsgIndex)
-			final, _ = sjson.Set(final, "item.id", st.CurrentMsgID)
-			final, _ = sjson.Set(final, "item.content.0.text", fullText)
-			out = append(out, emitEvent("response.output_item.done", final))
-		}
+		finalizeMessage()

 		// Close function calls
 		if len(st.FuncArgsBuf) > 0 {
@@ -289,6 +394,9 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 				}
 			}
 			for _, idx := range idxs {
+				if st.FuncDone[idx] {
+					continue
+				}
 				args := "{}"
 				if b := st.FuncArgsBuf[idx]; b != nil && b.Len() > 0 {
 					args = b.String()
@@ -308,6 +416,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 				itemDone, _ = sjson.Set(itemDone, "item.call_id", st.FuncCallIDs[idx])
 				itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx])
 				out = append(out, emitEvent("response.output_item.done", itemDone))
+
+				st.FuncDone[idx] = true
 			}
 		}

@@ -319,8 +429,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 		completed, _ = sjson.Set(completed, "response.id", st.ResponseID)
 		completed, _ = sjson.Set(completed, "response.created_at", st.CreatedAt)

-		if requestRawJSON != nil {
-			req := gjson.ParseBytes(requestRawJSON)
+		if reqJSON := pickRequestJSON(originalRequestRawJSON, requestRawJSON); len(reqJSON) > 0 {
+			req := unwrapRequestRoot(gjson.ParseBytes(reqJSON))
 			if v := req.Get("instructions"); v.Exists() {
 				completed, _ = sjson.Set(completed, "response.instructions", v.String())
 			}
@@ -383,41 +493,34 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 			}
 		}

-		// Compose outputs in encountered order: reasoning, message, function_calls
+		// Compose outputs in output_index order.
 		outputsWrapper := `{"arr":[]}`
-		if st.ReasoningOpened {
-			item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`
-			item, _ = sjson.Set(item, "id", st.ReasoningItemID)
-			item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String())
-			outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
-		}
-		if st.MsgOpened {
-			item := `{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`
-			item, _ = sjson.Set(item, "id", st.CurrentMsgID)
-			item, _ = sjson.Set(item, "content.0.text", st.TextBuf.String())
-			outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
-		}
-		if len(st.FuncArgsBuf) > 0 {
-			idxs := make([]int, 0, len(st.FuncArgsBuf))
-			for idx := range st.FuncArgsBuf {
-				idxs = append(idxs, idx)
+		for idx := 0; idx < st.NextIndex; idx++ {
+			if st.ReasoningOpened && idx == st.ReasoningIndex {
+				item := `{"id":"","type":"reasoning","encrypted_content":"","summary":[{"type":"summary_text","text":""}]}`
+				item, _ = sjson.Set(item, "id", st.ReasoningItemID)
+				item, _ = sjson.Set(item, "encrypted_content", st.ReasoningEnc)
+				item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String())
+				outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
+				continue
 			}
-			for i := 0; i < len(idxs); i++ {
-				for j := i + 1; j < len(idxs); j++ {
-					if idxs[j] < idxs[i] {
-						idxs[i], idxs[j] = idxs[j], idxs[i]
-					}
-				}
+			if st.MsgOpened && idx == st.MsgIndex {
+				item := `{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`
+				item, _ = sjson.Set(item, "id", st.CurrentMsgID)
+				item, _ = sjson.Set(item, "content.0.text", st.TextBuf.String())
+				outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
+				continue
 			}
-			for _, idx := range idxs {
-				args := ""
-				if b := st.FuncArgsBuf[idx]; b != nil {
+
+			if callID, ok := st.FuncCallIDs[idx]; ok && callID != "" {
+				args := "{}"
+				if b := st.FuncArgsBuf[idx]; b != nil && b.Len() > 0 {
 					args = b.String()
 				}
 				item := `{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}`
-				item, _ = sjson.Set(item, "id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx]))
+				item, _ = sjson.Set(item, "id", fmt.Sprintf("fc_%s", callID))
 				item, _ = sjson.Set(item, "arguments", args)
-				item, _ = sjson.Set(item, "call_id", st.FuncCallIDs[idx])
+				item, _ = sjson.Set(item, "call_id", callID)
 				item, _ = sjson.Set(item, "name", st.FuncNames[idx])
 				outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
 			}
@@ -431,8 +534,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 			// input tokens = prompt + thoughts
 			input := um.Get("promptTokenCount").Int() + um.Get("thoughtsTokenCount").Int()
 			completed, _ = sjson.Set(completed, "response.usage.input_tokens", input)
-			// cached_tokens not provided by Gemini; default to 0 for structure compatibility
-			completed, _ = sjson.Set(completed, "response.usage.input_tokens_details.cached_tokens", 0)
+			// cached token details: align with OpenAI "cached_tokens" semantics.
+			completed, _ = sjson.Set(completed, "response.usage.input_tokens_details.cached_tokens", um.Get("cachedContentTokenCount").Int())
 			// output tokens
 			if v := um.Get("candidatesTokenCount"); v.Exists() {
 				completed, _ = sjson.Set(completed, "response.usage.output_tokens", v.Int())
@@ -460,6 +563,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 // ConvertGeminiResponseToOpenAIResponsesNonStream aggregates Gemini response JSON into a single OpenAI Responses JSON object.
 func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	root := gjson.ParseBytes(rawJSON)
+	root = unwrapGeminiResponseRoot(root)

 	// Base response scaffold
 	resp := `{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null,"incomplete_details":null}`
@@ -478,15 +582,15 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 	// created_at: map from createTime if available
 	createdAt := time.Now().Unix()
 	if v := root.Get("createTime"); v.Exists() {
-		if t, err := time.Parse(time.RFC3339Nano, v.String()); err == nil {
+		if t, errParseCreateTime := time.Parse(time.RFC3339Nano, v.String()); errParseCreateTime == nil {
 			createdAt = t.Unix()
 		}
 	}
 	resp, _ = sjson.Set(resp, "created_at", createdAt)

 	// Echo request fields when present; fallback model from response modelVersion
-	if len(requestRawJSON) > 0 {
-		req := gjson.ParseBytes(requestRawJSON)
+	if reqJSON := pickRequestJSON(originalRequestRawJSON, requestRawJSON); len(reqJSON) > 0 {
+		req := unwrapRequestRoot(gjson.ParseBytes(reqJSON))
 		if v := req.Get("instructions"); v.Exists() {
 			resp, _ = sjson.Set(resp, "instructions", v.String())
 		}
@@ -636,8 +740,8 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 		// input tokens = prompt + thoughts
 		input := um.Get("promptTokenCount").Int() + um.Get("thoughtsTokenCount").Int()
 		resp, _ = sjson.Set(resp, "usage.input_tokens", input)
-		// cached_tokens not provided by Gemini; default to 0 for structure compatibility
-		resp, _ = sjson.Set(resp, "usage.input_tokens_details.cached_tokens", 0)
+		// cached token details: align with OpenAI "cached_tokens" semantics.
+		resp, _ = sjson.Set(resp, "usage.input_tokens_details.cached_tokens", um.Get("cachedContentTokenCount").Int())
 		// output tokens
 		if v := um.Get("candidatesTokenCount"); v.Exists() {
 			resp, _ = sjson.Set(resp, "usage.output_tokens", v.Int())
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response_test.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response_test.go
@@ -0,0 +1,353 @@
+package responses
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+func parseSSEEvent(t *testing.T, chunk string) (string, gjson.Result) {
+	t.Helper()
+
+	lines := strings.Split(chunk, "\n")
+	if len(lines) < 2 {
+		t.Fatalf("unexpected SSE chunk: %q", chunk)
+	}
+
+	event := strings.TrimSpace(strings.TrimPrefix(lines[0], "event:"))
+	dataLine := strings.TrimSpace(strings.TrimPrefix(lines[1], "data:"))
+	if !gjson.Valid(dataLine) {
+		t.Fatalf("invalid SSE data JSON: %q", dataLine)
+	}
+	return event, gjson.Parse(dataLine)
+}
+
+func TestConvertGeminiResponseToOpenAIResponses_UnwrapAndAggregateText(t *testing.T) {
+	// Vertex-style Gemini stream wraps the actual response payload under "response".
+	// This test ensures we unwrap and that output_text.done contains the full text.
+	in := []string{
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"让"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"我先"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"了解"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"mcp__serena__list_dir","args":{"recursive":false,"relative_path":"internal"},"id":"toolu_1"}}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":10,"candidatesTokenCount":5,"totalTokenCount":15,"cachedContentTokenCount":2},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+	}
+
+	originalReq := []byte(`{"instructions":"test instructions","model":"gpt-5","max_output_tokens":123}`)
+
+	var param any
+	var out []string
+	for _, line := range in {
+		out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", originalReq, nil, []byte(line), &param)...)
+	}
+
+	var (
+		gotTextDone     bool
+		gotMessageDone  bool
+		gotResponseDone bool
+		gotFuncDone     bool
+
+		textDone     string
+		messageText  string
+		responseID   string
+		instructions string
+		cachedTokens int64
+
+		funcName string
+		funcArgs string
+
+		posTextDone    = -1
+		posPartDone    = -1
+		posMessageDone = -1
+		posFuncAdded   = -1
+	)
+
+	for i, chunk := range out {
+		ev, data := parseSSEEvent(t, chunk)
+		switch ev {
+		case "response.output_text.done":
+			gotTextDone = true
+			if posTextDone == -1 {
+				posTextDone = i
+			}
+			textDone = data.Get("text").String()
+		case "response.content_part.done":
+			if posPartDone == -1 {
+				posPartDone = i
+			}
+		case "response.output_item.done":
+			switch data.Get("item.type").String() {
+			case "message":
+				gotMessageDone = true
+				if posMessageDone == -1 {
+					posMessageDone = i
+				}
+				messageText = data.Get("item.content.0.text").String()
+			case "function_call":
+				gotFuncDone = true
+				funcName = data.Get("item.name").String()
+				funcArgs = data.Get("item.arguments").String()
+			}
+		case "response.output_item.added":
+			if data.Get("item.type").String() == "function_call" && posFuncAdded == -1 {
+				posFuncAdded = i
+			}
+		case "response.completed":
+			gotResponseDone = true
+			responseID = data.Get("response.id").String()
+			instructions = data.Get("response.instructions").String()
+			cachedTokens = data.Get("response.usage.input_tokens_details.cached_tokens").Int()
+		}
+	}
+
+	if !gotTextDone {
+		t.Fatalf("missing response.output_text.done event")
+	}
+	if posTextDone == -1 || posPartDone == -1 || posMessageDone == -1 || posFuncAdded == -1 {
+		t.Fatalf("missing ordering events: textDone=%d partDone=%d messageDone=%d funcAdded=%d", posTextDone, posPartDone, posMessageDone, posFuncAdded)
+	}
+	if !(posTextDone < posPartDone && posPartDone < posMessageDone && posMessageDone < posFuncAdded) {
+		t.Fatalf("unexpected message/function ordering: textDone=%d partDone=%d messageDone=%d funcAdded=%d", posTextDone, posPartDone, posMessageDone, posFuncAdded)
+	}
+	if !gotMessageDone {
+		t.Fatalf("missing message response.output_item.done event")
+	}
+	if !gotFuncDone {
+		t.Fatalf("missing function_call response.output_item.done event")
+	}
+	if !gotResponseDone {
+		t.Fatalf("missing response.completed event")
+	}
+
+	if textDone != "让我先了解" {
+		t.Fatalf("unexpected output_text.done text: got %q", textDone)
+	}
+	if messageText != "让我先了解" {
+		t.Fatalf("unexpected message done text: got %q", messageText)
+	}
+
+	if responseID != "resp_req_vrtx_1" {
+		t.Fatalf("unexpected response id: got %q", responseID)
+	}
+	if instructions != "test instructions" {
+		t.Fatalf("unexpected instructions echo: got %q", instructions)
+	}
+	if cachedTokens != 2 {
+		t.Fatalf("unexpected cached token count: got %d", cachedTokens)
+	}
+
+	if funcName != "mcp__serena__list_dir" {
+		t.Fatalf("unexpected function name: got %q", funcName)
+	}
+	if !gjson.Valid(funcArgs) {
+		t.Fatalf("invalid function arguments JSON: %q", funcArgs)
+	}
+	if gjson.Get(funcArgs, "recursive").Bool() != false {
+		t.Fatalf("unexpected recursive arg: %v", gjson.Get(funcArgs, "recursive").Value())
+	}
+	if gjson.Get(funcArgs, "relative_path").String() != "internal" {
+		t.Fatalf("unexpected relative_path arg: %q", gjson.Get(funcArgs, "relative_path").String())
+	}
+}
+
+func TestConvertGeminiResponseToOpenAIResponses_ReasoningEncryptedContent(t *testing.T) {
+	sig := "RXE0RENrZ0lDeEFDR0FJcVFOZDdjUzlleGFuRktRdFcvSzNyZ2MvWDNCcDQ4RmxSbGxOWUlOVU5kR1l1UHMrMGdkMVp0Vkg3ekdKU0g4YVljc2JjN3lNK0FrdGpTNUdqamI4T3Z0VVNETzdQd3pmcFhUOGl3U3hXUEJvTVFRQ09mWTFyMEtTWGZxUUlJakFqdmFGWk83RW1XRlBKckJVOVpkYzdDKw=="
+	in := []string{
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"thought":true,"thoughtSignature":"` + sig + `","text":""}]}}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"thought":true,"text":"a"}]}}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"hello"}]}}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`,
+	}
+
+	var param any
+	var out []string
+	for _, line := range in {
+		out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", nil, nil, []byte(line), &param)...)
+	}
+
+	var (
+		addedEnc string
+		doneEnc  string
+	)
+	for _, chunk := range out {
+		ev, data := parseSSEEvent(t, chunk)
+		switch ev {
+		case "response.output_item.added":
+			if data.Get("item.type").String() == "reasoning" {
+				addedEnc = data.Get("item.encrypted_content").String()
+			}
+		case "response.output_item.done":
+			if data.Get("item.type").String() == "reasoning" {
+				doneEnc = data.Get("item.encrypted_content").String()
+			}
+		}
+	}
+
+	if addedEnc != sig {
+		t.Fatalf("unexpected encrypted_content in response.output_item.added: got %q", addedEnc)
+	}
+	if doneEnc != sig {
+		t.Fatalf("unexpected encrypted_content in response.output_item.done: got %q", doneEnc)
+	}
+}
+
+func TestConvertGeminiResponseToOpenAIResponses_FunctionCallEventOrder(t *testing.T) {
+	in := []string{
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool0"}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool1"}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool2","args":{"a":1}}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":10,"candidatesTokenCount":5,"totalTokenCount":15,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`,
+	}
+
+	var param any
+	var out []string
+	for _, line := range in {
+		out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", nil, nil, []byte(line), &param)...)
+	}
+
+	posAdded := []int{-1, -1, -1}
+	posArgsDelta := []int{-1, -1, -1}
+	posArgsDone := []int{-1, -1, -1}
+	posItemDone := []int{-1, -1, -1}
+	posCompleted := -1
+	deltaByIndex := map[int]string{}
+
+	for i, chunk := range out {
+		ev, data := parseSSEEvent(t, chunk)
+		switch ev {
+		case "response.output_item.added":
+			if data.Get("item.type").String() != "function_call" {
+				continue
+			}
+			idx := int(data.Get("output_index").Int())
+			if idx >= 0 && idx < len(posAdded) {
+				posAdded[idx] = i
+			}
+		case "response.function_call_arguments.delta":
+			idx := int(data.Get("output_index").Int())
+			if idx >= 0 && idx < len(posArgsDelta) {
+				posArgsDelta[idx] = i
+				deltaByIndex[idx] = data.Get("delta").String()
+			}
+		case "response.function_call_arguments.done":
+			idx := int(data.Get("output_index").Int())
+			if idx >= 0 && idx < len(posArgsDone) {
+				posArgsDone[idx] = i
+			}
+		case "response.output_item.done":
+			if data.Get("item.type").String() != "function_call" {
+				continue
+			}
+			idx := int(data.Get("output_index").Int())
+			if idx >= 0 && idx < len(posItemDone) {
+				posItemDone[idx] = i
+			}
+		case "response.completed":
+			posCompleted = i
+
+			output := data.Get("response.output")
+			if !output.Exists() || !output.IsArray() {
+				t.Fatalf("missing response.output in response.completed")
+			}
+			if len(output.Array()) != 3 {
+				t.Fatalf("unexpected response.output length: got %d", len(output.Array()))
+			}
+			if data.Get("response.output.0.name").String() != "tool0" || data.Get("response.output.0.arguments").String() != "{}" {
+				t.Fatalf("unexpected output[0]: %s", data.Get("response.output.0").Raw)
+			}
+			if data.Get("response.output.1.name").String() != "tool1" || data.Get("response.output.1.arguments").String() != "{}" {
+				t.Fatalf("unexpected output[1]: %s", data.Get("response.output.1").Raw)
+			}
+			if data.Get("response.output.2.name").String() != "tool2" {
+				t.Fatalf("unexpected output[2] name: %s", data.Get("response.output.2").Raw)
+			}
+			if !gjson.Valid(data.Get("response.output.2.arguments").String()) {
+				t.Fatalf("unexpected output[2] arguments: %q", data.Get("response.output.2.arguments").String())
+			}
+		}
+	}
+
+	if posCompleted == -1 {
+		t.Fatalf("missing response.completed event")
+	}
+	for idx := 0; idx < 3; idx++ {
+		if posAdded[idx] == -1 || posArgsDelta[idx] == -1 || posArgsDone[idx] == -1 || posItemDone[idx] == -1 {
+			t.Fatalf("missing function call events for output_index %d: added=%d argsDelta=%d argsDone=%d itemDone=%d", idx, posAdded[idx], posArgsDelta[idx], posArgsDone[idx], posItemDone[idx])
+		}
+		if !(posAdded[idx] < posArgsDelta[idx] && posArgsDelta[idx] < posArgsDone[idx] && posArgsDone[idx] < posItemDone[idx]) {
+			t.Fatalf("unexpected ordering for output_index %d: added=%d argsDelta=%d argsDone=%d itemDone=%d", idx, posAdded[idx], posArgsDelta[idx], posArgsDone[idx], posItemDone[idx])
+		}
+		if idx > 0 && !(posItemDone[idx-1] < posAdded[idx]) {
+			t.Fatalf("function call events overlap between %d and %d: prevDone=%d nextAdded=%d", idx-1, idx, posItemDone[idx-1], posAdded[idx])
+		}
+	}
+
+	if deltaByIndex[0] != "{}" {
+		t.Fatalf("unexpected delta for output_index 0: got %q", deltaByIndex[0])
+	}
+	if deltaByIndex[1] != "{}" {
+		t.Fatalf("unexpected delta for output_index 1: got %q", deltaByIndex[1])
+	}
+	if deltaByIndex[2] == "" || !gjson.Valid(deltaByIndex[2]) || gjson.Get(deltaByIndex[2], "a").Int() != 1 {
+		t.Fatalf("unexpected delta for output_index 2: got %q", deltaByIndex[2])
+	}
+	if !(posItemDone[2] < posCompleted) {
+		t.Fatalf("response.completed should be after last output_item.done: last=%d completed=%d", posItemDone[2], posCompleted)
+	}
+}
+
+func TestConvertGeminiResponseToOpenAIResponses_ResponseOutputOrdering(t *testing.T) {
+	in := []string{
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool0","args":{"x":"y"}}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_2"},"traceId":"t2"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"hi"}]}}],"modelVersion":"test-model","responseId":"req_vrtx_2"},"traceId":"t2"}`,
+		`data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_2"},"traceId":"t2"}`,
+	}
+
+	var param any
+	var out []string
+	for _, line := range in {
+		out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", nil, nil, []byte(line), &param)...)
+	}
+
+	posFuncDone := -1
+	posMsgAdded := -1
+	posCompleted := -1
+
+	for i, chunk := range out {
+		ev, data := parseSSEEvent(t, chunk)
+		switch ev {
+		case "response.output_item.done":
+			if data.Get("item.type").String() == "function_call" && data.Get("output_index").Int() == 0 {
+				posFuncDone = i
+			}
+		case "response.output_item.added":
+			if data.Get("item.type").String() == "message" && data.Get("output_index").Int() == 1 {
+				posMsgAdded = i
+			}
+		case "response.completed":
+			posCompleted = i
+			if data.Get("response.output.0.type").String() != "function_call" {
+				t.Fatalf("expected response.output[0] to be function_call: %s", data.Get("response.output.0").Raw)
+			}
+			if data.Get("response.output.1.type").String() != "message" {
+				t.Fatalf("expected response.output[1] to be message: %s", data.Get("response.output.1").Raw)
+			}
+			if data.Get("response.output.1.content.0.text").String() != "hi" {
+				t.Fatalf("unexpected message text in response.output[1]: %s", data.Get("response.output.1").Raw)
+			}
+		}
+	}
+
+	if posFuncDone == -1 || posMsgAdded == -1 || posCompleted == -1 {
+		t.Fatalf("missing required events: funcDone=%d msgAdded=%d completed=%d", posFuncDone, posMsgAdded, posCompleted)
+	}
+	if !(posFuncDone < posMsgAdded) {
+		t.Fatalf("expected function_call to complete before message is added: funcDone=%d msgAdded=%d", posFuncDone, posMsgAdded)
+	}
+	if !(posMsgAdded < posCompleted) {
+		t.Fatalf("expected response.completed after message added: msgAdded=%d completed=%d", posMsgAdded, posCompleted)
+	}
+}
--- a/Show More
+++ b/Show More