From a1da6ff5acc2421f6b2c507c30a181204b37a2e9 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Wed, 14 Jan 2026 04:29:15 +0800
Subject: [PATCH 01/65] Fixed: #499 #985

feat(oauth): add support for customizable OAuth callback ports

- Introduced `oauth-callback-port` flag to override default callback ports.
- Updated SDK and login flows for `iflow`, `gemini`, `antigravity`, `codex`, `claude`, and `openai` to respect configurable callback ports.
- Refactored internal OAuth servers to dynamically assign ports based on the provided options.
- Revised tests and documentation to reflect the new flag and behavior.
---
 cmd/server/main.go                  |  5 ++++-
 internal/auth/gemini/gemini_auth.go | 34 ++++++++++++++++++++---------
 internal/cmd/anthropic_login.go     |  7 +++---
 internal/cmd/antigravity_login.go   |  7 +++---
 internal/cmd/iflow_login.go         |  7 +++---
 internal/cmd/login.go               | 14 +++++++-----
 internal/cmd/openai_login.go        | 10 ++++++---
 internal/cmd/qwen_login.go          |  7 +++---
 sdk/auth/antigravity.go             | 16 ++++++++++----
 sdk/auth/claude.go                  | 13 +++++++----
 sdk/auth/codex.go                   | 13 +++++++----
 sdk/auth/gemini.go                  |  5 +++--
 sdk/auth/iflow.go                   | 15 ++++++++-----
 sdk/auth/interfaces.go              |  9 ++++----
 14 files changed, 107 insertions(+), 55 deletions(-)

diff --git a/cmd/server/main.go b/cmd/server/main.go
index f9bb2080..385d7cfa 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -61,6 +61,7 @@ func main() {
 	var iflowLogin bool
 	var iflowCookie bool
 	var noBrowser bool
+	var oauthCallbackPort int
 	var antigravityLogin bool
 	var projectID string
 	var vertexImport string
@@ -75,6 +76,7 @@ func main() {
 	flag.BoolVar(&iflowLogin, "iflow-login", false, "Login to iFlow using OAuth")
 	flag.BoolVar(&iflowCookie, "iflow-cookie", false, "Login to iFlow using Cookie")
 	flag.BoolVar(&noBrowser, "no-browser", false, "Don't open browser automatically for OAuth")
+	flag.IntVar(&oauthCallbackPort, "oauth-callback-port", 0, "Override OAuth callback port (defaults to provider-specific port)")
 	flag.BoolVar(&antigravityLogin, "antigravity-login", false, "Login to Antigravity using OAuth")
 	flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
 	flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
@@ -425,7 +427,8 @@ func main() {
 
 	// Create login options to be used in authentication flows.
 	options := &cmd.LoginOptions{
-		NoBrowser: noBrowser,
+		NoBrowser:    noBrowser,
+		CallbackPort: oauthCallbackPort,
 	}
 
 	// Register the shared token store once so all components use the same persistence backend.
diff --git a/internal/auth/gemini/gemini_auth.go b/internal/auth/gemini/gemini_auth.go
index 7b18e738..708ac809 100644
--- a/internal/auth/gemini/gemini_auth.go
+++ b/internal/auth/gemini/gemini_auth.go
@@ -29,8 +29,9 @@ import (
 )
 
 const (
-	geminiOauthClientID     = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-	geminiOauthClientSecret = "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"
+	geminiOauthClientID       = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+	geminiOauthClientSecret   = "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"
+	geminiDefaultCallbackPort = 8085
 )
 
 var (
@@ -49,8 +50,9 @@ type GeminiAuth struct {
 
 // WebLoginOptions customizes the interactive OAuth flow.
 type WebLoginOptions struct {
-	NoBrowser bool
-	Prompt    func(string) (string, error)
+	NoBrowser    bool
+	CallbackPort int
+	Prompt       func(string) (string, error)
 }
 
 // NewGeminiAuth creates a new instance of GeminiAuth.
@@ -72,6 +74,12 @@ func NewGeminiAuth() *GeminiAuth {
 //   - *http.Client: An HTTP client configured with authentication
 //   - error: An error if the client configuration fails, nil otherwise
 func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiTokenStorage, cfg *config.Config, opts *WebLoginOptions) (*http.Client, error) {
+	callbackPort := geminiDefaultCallbackPort
+	if opts != nil && opts.CallbackPort > 0 {
+		callbackPort = opts.CallbackPort
+	}
+	callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort)
+
 	// Configure proxy settings for the HTTP client if a proxy URL is provided.
 	proxyURL, err := url.Parse(cfg.ProxyURL)
 	if err == nil {
@@ -106,7 +114,7 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken
 	conf := &oauth2.Config{
 		ClientID:     geminiOauthClientID,
 		ClientSecret: geminiOauthClientSecret,
-		RedirectURL:  "http://localhost:8085/oauth2callback", // This will be used by the local server.
+		RedirectURL:  callbackURL, // This will be used by the local server.
 		Scopes:       geminiOauthScopes,
 		Endpoint:     google.Endpoint,
 	}
@@ -218,14 +226,20 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 //   - *oauth2.Token: The OAuth2 token obtained from the authorization flow
 //   - error: An error if the token acquisition fails, nil otherwise
 func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, opts *WebLoginOptions) (*oauth2.Token, error) {
+	callbackPort := geminiDefaultCallbackPort
+	if opts != nil && opts.CallbackPort > 0 {
+		callbackPort = opts.CallbackPort
+	}
+	callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort)
+
 	// Use a channel to pass the authorization code from the HTTP handler to the main function.
 	codeChan := make(chan string, 1)
 	errChan := make(chan error, 1)
 
 	// Create a new HTTP server with its own multiplexer.
 	mux := http.NewServeMux()
-	server := &http.Server{Addr: ":8085", Handler: mux}
-	config.RedirectURL = "http://localhost:8085/oauth2callback"
+	server := &http.Server{Addr: fmt.Sprintf(":%d", callbackPort), Handler: mux}
+	config.RedirectURL = callbackURL
 
 	mux.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) {
 		if err := r.URL.Query().Get("error"); err != "" {
@@ -277,13 +291,13 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
 		// Check if browser is available
 		if !browser.IsAvailable() {
 			log.Warn("No browser available on this system")
-			util.PrintSSHTunnelInstructions(8085)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL)
 		} else {
 			if err := browser.OpenURL(authURL); err != nil {
 				authErr := codex.NewAuthenticationError(codex.ErrBrowserOpenFailed, err)
 				log.Warn(codex.GetUserFriendlyMessage(authErr))
-				util.PrintSSHTunnelInstructions(8085)
+				util.PrintSSHTunnelInstructions(callbackPort)
 				fmt.Printf("Please manually open this URL in your browser:\n\n%s\n", authURL)
 
 				// Log platform info for debugging
@@ -294,7 +308,7 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
 			}
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(8085)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Please open this URL in your browser:\n\n%s\n", authURL)
 	}
 
diff --git a/internal/cmd/anthropic_login.go b/internal/cmd/anthropic_login.go
index 6efd87a8..dafdd02b 100644
--- a/internal/cmd/anthropic_login.go
+++ b/internal/cmd/anthropic_login.go
@@ -32,9 +32,10 @@ func DoClaudeLogin(cfg *config.Config, options *LoginOptions) {
 	manager := newAuthManager()
 
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
-		Metadata:  map[string]string{},
-		Prompt:    promptFn,
+		NoBrowser:    options.NoBrowser,
+		CallbackPort: options.CallbackPort,
+		Metadata:     map[string]string{},
+		Prompt:       promptFn,
 	}
 
 	_, savedPath, err := manager.Login(context.Background(), "claude", cfg, authOpts)
diff --git a/internal/cmd/antigravity_login.go b/internal/cmd/antigravity_login.go
index 1cd42899..2efbaeee 100644
--- a/internal/cmd/antigravity_login.go
+++ b/internal/cmd/antigravity_login.go
@@ -22,9 +22,10 @@ func DoAntigravityLogin(cfg *config.Config, options *LoginOptions) {
 
 	manager := newAuthManager()
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
-		Metadata:  map[string]string{},
-		Prompt:    promptFn,
+		NoBrowser:    options.NoBrowser,
+		CallbackPort: options.CallbackPort,
+		Metadata:     map[string]string{},
+		Prompt:       promptFn,
 	}
 
 	record, savedPath, err := manager.Login(context.Background(), "antigravity", cfg, authOpts)
diff --git a/internal/cmd/iflow_login.go b/internal/cmd/iflow_login.go
index cf00b63c..07360b8c 100644
--- a/internal/cmd/iflow_login.go
+++ b/internal/cmd/iflow_login.go
@@ -24,9 +24,10 @@ func DoIFlowLogin(cfg *config.Config, options *LoginOptions) {
 	}
 
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
-		Metadata:  map[string]string{},
-		Prompt:    promptFn,
+		NoBrowser:    options.NoBrowser,
+		CallbackPort: options.CallbackPort,
+		Metadata:     map[string]string{},
+		Prompt:       promptFn,
 	}
 
 	_, savedPath, err := manager.Login(context.Background(), "iflow", cfg, authOpts)
diff --git a/internal/cmd/login.go b/internal/cmd/login.go
index 3bb0b9a5..558dacf6 100644
--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -67,10 +67,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	}
 
 	loginOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
-		ProjectID: trimmedProjectID,
-		Metadata:  map[string]string{},
-		Prompt:    callbackPrompt,
+		NoBrowser:    options.NoBrowser,
+		ProjectID:    trimmedProjectID,
+		CallbackPort: options.CallbackPort,
+		Metadata:     map[string]string{},
+		Prompt:       callbackPrompt,
 	}
 
 	authenticator := sdkAuth.NewGeminiAuthenticator()
@@ -88,8 +89,9 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 
 	geminiAuth := gemini.NewGeminiAuth()
 	httpClient, errClient := geminiAuth.GetAuthenticatedClient(ctx, storage, cfg, &gemini.WebLoginOptions{
-		NoBrowser: options.NoBrowser,
-		Prompt:    callbackPrompt,
+		NoBrowser:    options.NoBrowser,
+		CallbackPort: options.CallbackPort,
+		Prompt:       callbackPrompt,
 	})
 	if errClient != nil {
 		log.Errorf("Gemini authentication failed: %v", errClient)
diff --git a/internal/cmd/openai_login.go b/internal/cmd/openai_login.go
index d981f6ae..5f2fb162 100644
--- a/internal/cmd/openai_login.go
+++ b/internal/cmd/openai_login.go
@@ -19,6 +19,9 @@ type LoginOptions struct {
 	// NoBrowser indicates whether to skip opening the browser automatically.
 	NoBrowser bool
 
+	// CallbackPort overrides the local OAuth callback port when set (>0).
+	CallbackPort int
+
 	// Prompt allows the caller to provide interactive input when needed.
 	Prompt func(prompt string) (string, error)
 }
@@ -43,9 +46,10 @@ func DoCodexLogin(cfg *config.Config, options *LoginOptions) {
 	manager := newAuthManager()
 
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
-		Metadata:  map[string]string{},
-		Prompt:    promptFn,
+		NoBrowser:    options.NoBrowser,
+		CallbackPort: options.CallbackPort,
+		Metadata:     map[string]string{},
+		Prompt:       promptFn,
 	}
 
 	_, savedPath, err := manager.Login(context.Background(), "codex", cfg, authOpts)
diff --git a/internal/cmd/qwen_login.go b/internal/cmd/qwen_login.go
index 27edf408..92a57aa5 100644
--- a/internal/cmd/qwen_login.go
+++ b/internal/cmd/qwen_login.go
@@ -36,9 +36,10 @@ func DoQwenLogin(cfg *config.Config, options *LoginOptions) {
 	}
 
 	authOpts := &sdkAuth.LoginOptions{
-		NoBrowser: options.NoBrowser,
-		Metadata:  map[string]string{},
-		Prompt:    promptFn,
+		NoBrowser:    options.NoBrowser,
+		CallbackPort: options.CallbackPort,
+		Metadata:     map[string]string{},
+		Prompt:       promptFn,
 	}
 
 	_, savedPath, err := manager.Login(context.Background(), "qwen", cfg, authOpts)
diff --git a/sdk/auth/antigravity.go b/sdk/auth/antigravity.go
index ae22f772..b59acacf 100644
--- a/sdk/auth/antigravity.go
+++ b/sdk/auth/antigravity.go
@@ -60,6 +60,11 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o
 		opts = &LoginOptions{}
 	}
 
+	callbackPort := antigravityCallbackPort
+	if opts.CallbackPort > 0 {
+		callbackPort = opts.CallbackPort
+	}
+
 	httpClient := util.SetProxy(&cfg.SDKConfig, &http.Client{})
 
 	state, err := misc.GenerateRandomState()
@@ -67,7 +72,7 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o
 		return nil, fmt.Errorf("antigravity: failed to generate state: %w", err)
 	}
 
-	srv, port, cbChan, errServer := startAntigravityCallbackServer()
+	srv, port, cbChan, errServer := startAntigravityCallbackServer(callbackPort)
 	if errServer != nil {
 		return nil, fmt.Errorf("antigravity: failed to start callback server: %w", errServer)
 	}
@@ -224,13 +229,16 @@ type callbackResult struct {
 	State string
 }
 
-func startAntigravityCallbackServer() (*http.Server, int, <-chan callbackResult, error) {
-	addr := fmt.Sprintf(":%d", antigravityCallbackPort)
+func startAntigravityCallbackServer(port int) (*http.Server, int, <-chan callbackResult, error) {
+	if port <= 0 {
+		port = antigravityCallbackPort
+	}
+	addr := fmt.Sprintf(":%d", port)
 	listener, err := net.Listen("tcp", addr)
 	if err != nil {
 		return nil, 0, nil, err
 	}
-	port := listener.Addr().(*net.TCPAddr).Port
+	port = listener.Addr().(*net.TCPAddr).Port
 	resultCh := make(chan callbackResult, 1)
 
 	mux := http.NewServeMux()
diff --git a/sdk/auth/claude.go b/sdk/auth/claude.go
index c43b78cd..2c7a8988 100644
--- a/sdk/auth/claude.go
+++ b/sdk/auth/claude.go
@@ -47,6 +47,11 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 		opts = &LoginOptions{}
 	}
 
+	callbackPort := a.CallbackPort
+	if opts.CallbackPort > 0 {
+		callbackPort = opts.CallbackPort
+	}
+
 	pkceCodes, err := claude.GeneratePKCECodes()
 	if err != nil {
 		return nil, fmt.Errorf("claude pkce generation failed: %w", err)
@@ -57,7 +62,7 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 		return nil, fmt.Errorf("claude state generation failed: %w", err)
 	}
 
-	oauthServer := claude.NewOAuthServer(a.CallbackPort)
+	oauthServer := claude.NewOAuthServer(callbackPort)
 	if err = oauthServer.Start(); err != nil {
 		if strings.Contains(err.Error(), "already in use") {
 			return nil, claude.NewAuthenticationError(claude.ErrPortInUse, err)
@@ -84,15 +89,15 @@ func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 		fmt.Println("Opening browser for Claude authentication")
 		if !browser.IsAvailable() {
 			log.Warn("No browser available; please open the URL manually")
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		} else if err = browser.OpenURL(authURL); err != nil {
 			log.Warnf("Failed to open browser automatically: %v", err)
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(a.CallbackPort)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 	}
 
diff --git a/sdk/auth/codex.go b/sdk/auth/codex.go
index 99992525..b3104b4e 100644
--- a/sdk/auth/codex.go
+++ b/sdk/auth/codex.go
@@ -47,6 +47,11 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		opts = &LoginOptions{}
 	}
 
+	callbackPort := a.CallbackPort
+	if opts.CallbackPort > 0 {
+		callbackPort = opts.CallbackPort
+	}
+
 	pkceCodes, err := codex.GeneratePKCECodes()
 	if err != nil {
 		return nil, fmt.Errorf("codex pkce generation failed: %w", err)
@@ -57,7 +62,7 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		return nil, fmt.Errorf("codex state generation failed: %w", err)
 	}
 
-	oauthServer := codex.NewOAuthServer(a.CallbackPort)
+	oauthServer := codex.NewOAuthServer(callbackPort)
 	if err = oauthServer.Start(); err != nil {
 		if strings.Contains(err.Error(), "already in use") {
 			return nil, codex.NewAuthenticationError(codex.ErrPortInUse, err)
@@ -83,15 +88,15 @@ func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		fmt.Println("Opening browser for Codex authentication")
 		if !browser.IsAvailable() {
 			log.Warn("No browser available; please open the URL manually")
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		} else if err = browser.OpenURL(authURL); err != nil {
 			log.Warnf("Failed to open browser automatically: %v", err)
-			util.PrintSSHTunnelInstructions(a.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(a.CallbackPort)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 	}
 
diff --git a/sdk/auth/gemini.go b/sdk/auth/gemini.go
index 75ef4579..2b8f9c2b 100644
--- a/sdk/auth/gemini.go
+++ b/sdk/auth/gemini.go
@@ -45,8 +45,9 @@ func (a *GeminiAuthenticator) Login(ctx context.Context, cfg *config.Config, opt
 
 	geminiAuth := gemini.NewGeminiAuth()
 	_, err := geminiAuth.GetAuthenticatedClient(ctx, &ts, cfg, &gemini.WebLoginOptions{
-		NoBrowser: opts.NoBrowser,
-		Prompt:    opts.Prompt,
+		NoBrowser:    opts.NoBrowser,
+		CallbackPort: opts.CallbackPort,
+		Prompt:       opts.Prompt,
 	})
 	if err != nil {
 		return nil, fmt.Errorf("gemini authentication failed: %w", err)
diff --git a/sdk/auth/iflow.go b/sdk/auth/iflow.go
index 3fd82f1d..6d4ff946 100644
--- a/sdk/auth/iflow.go
+++ b/sdk/auth/iflow.go
@@ -42,9 +42,14 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		opts = &LoginOptions{}
 	}
 
+	callbackPort := iflow.CallbackPort
+	if opts.CallbackPort > 0 {
+		callbackPort = opts.CallbackPort
+	}
+
 	authSvc := iflow.NewIFlowAuth(cfg)
 
-	oauthServer := iflow.NewOAuthServer(iflow.CallbackPort)
+	oauthServer := iflow.NewOAuthServer(callbackPort)
 	if err := oauthServer.Start(); err != nil {
 		if strings.Contains(err.Error(), "already in use") {
 			return nil, fmt.Errorf("iflow authentication server port in use: %w", err)
@@ -64,21 +69,21 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		return nil, fmt.Errorf("iflow auth: failed to generate state: %w", err)
 	}
 
-	authURL, redirectURI := authSvc.AuthorizationURL(state, iflow.CallbackPort)
+	authURL, redirectURI := authSvc.AuthorizationURL(state, callbackPort)
 
 	if !opts.NoBrowser {
 		fmt.Println("Opening browser for iFlow authentication")
 		if !browser.IsAvailable() {
 			log.Warn("No browser available; please open the URL manually")
-			util.PrintSSHTunnelInstructions(iflow.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		} else if err = browser.OpenURL(authURL); err != nil {
 			log.Warnf("Failed to open browser automatically: %v", err)
-			util.PrintSSHTunnelInstructions(iflow.CallbackPort)
+			util.PrintSSHTunnelInstructions(callbackPort)
 			fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 		}
 	} else {
-		util.PrintSSHTunnelInstructions(iflow.CallbackPort)
+		util.PrintSSHTunnelInstructions(callbackPort)
 		fmt.Printf("Visit the following URL to continue authentication:\n%s\n", authURL)
 	}
 
diff --git a/sdk/auth/interfaces.go b/sdk/auth/interfaces.go
index 7a7868e1..64cf8ed0 100644
--- a/sdk/auth/interfaces.go
+++ b/sdk/auth/interfaces.go
@@ -14,10 +14,11 @@ var ErrRefreshNotSupported = errors.New("cliproxy auth: refresh not supported")
 // LoginOptions captures generic knobs shared across authenticators.
 // Provider-specific logic can inspect Metadata for extra parameters.
 type LoginOptions struct {
-	NoBrowser bool
-	ProjectID string
-	Metadata  map[string]string
-	Prompt    func(prompt string) (string, error)
+	NoBrowser    bool
+	ProjectID    string
+	CallbackPort int
+	Metadata     map[string]string
+	Prompt       func(prompt string) (string, error)
 }
 
 // Authenticator manages login and optional refresh flows for a provider.

From b163f8ed9e75142e81cde1fce8102d53f7b61a44 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Thu, 15 Jan 2026 03:27:00 +0800
Subject: [PATCH 02/65] Fixed: #1004

feat(translator): add function name to response output item serialization

- Included `item.name` in the serialized response output to enhance output item handling.
---
 .../claude/openai/responses/claude_openai-responses_response.go  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_response.go b/internal/translator/claude/openai/responses/claude_openai-responses_response.go
index 593ec287..e77b09e1 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_response.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_response.go
@@ -251,6 +251,7 @@ func ConvertClaudeResponseToOpenAIResponses(ctx context.Context, modelName strin
 			itemDone, _ = sjson.Set(itemDone, "item.id", fmt.Sprintf("fc_%s", st.CurrentFCID))
 			itemDone, _ = sjson.Set(itemDone, "item.arguments", args)
 			itemDone, _ = sjson.Set(itemDone, "item.call_id", st.CurrentFCID)
+			itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx])
 			out = append(out, emitEvent("response.output_item.done", itemDone))
 			st.InFuncBlock = false
 		} else if st.ReasoningActive {

From 5df195ea823efcdc1f0f523df80fde7395393827 Mon Sep 17 00:00:00 2001
From: pikeman20 <pikeman20@users.noreply.github.com>
Date: Thu, 15 Jan 2026 05:42:51 +0700
Subject: [PATCH 03/65] feat(docker): use environment variables for volume
 paths

This change introduces environment variable interpolation for volume paths, allowing users to customize where configuration, authentication, and log data are stored.

Why: Makes the project easier to deploy on various hosting environments that require decoupled data management without needing to modify the core docker-compose.yml..

Key points:

Defaults to existing paths (./config.yaml, ./auths, ./logs) to ensure zero breaking changes for current users.

Follows the existing naming convention used in the project.

Enhances portability for CI/CD and cloud-native deployments.
---
 docker-compose.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 29712419..ad2190c2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -22,7 +22,7 @@ services:
       - "51121:51121"
       - "11451:11451"
     volumes:
-      - ./config.yaml:/CLIProxyAPI/config.yaml
-      - ./auths:/root/.cli-proxy-api
-      - ./logs:/CLIProxyAPI/logs
+      - ${CLI_PROXY_CONFIG_PATH:-./config.yaml}:/CLIProxyAPI/config.yaml
+      - ${CLI_PROXY_AUTH_PATH:-./auths}:/root/.cli-proxy-api
+      - ${CLI_PROXY_LOG_PATH:-./logs}:/CLIProxyAPI/logs
     restart: unless-stopped

From 6f8a8f813640235d10ea673073619b3c6900d675 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Thu, 15 Jan 2026 07:08:24 +0800
Subject: [PATCH 04/65] feat(selector): add priority support for auth selection

---
 internal/config/config.go              |  16 ++
 internal/config/vertex_compat.go       |   4 +
 internal/watcher/synthesizer/config.go |  19 ++
 sdk/cliproxy/auth/conductor.go         | 253 +++++++++++++++++++++++--
 sdk/cliproxy/auth/selector.go          |  49 ++++-
 sdk/cliproxy/auth/selector_test.go     |  64 +++++++
 6 files changed, 381 insertions(+), 24 deletions(-)

diff --git a/internal/config/config.go b/internal/config/config.go
index 99beb481..6843d6b8 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -242,6 +242,10 @@ type ClaudeKey struct {
 	// APIKey is the authentication key for accessing Claude API services.
 	APIKey string `yaml:"api-key" json:"api-key"`
 
+	// Priority controls selection preference when multiple credentials match.
+	// Higher values are preferred; defaults to 0.
+	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
+
 	// Prefix optionally namespaces models for this credential (e.g., "teamA/claude-sonnet-4").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
 
@@ -280,6 +284,10 @@ type CodexKey struct {
 	// APIKey is the authentication key for accessing Codex API services.
 	APIKey string `yaml:"api-key" json:"api-key"`
 
+	// Priority controls selection preference when multiple credentials match.
+	// Higher values are preferred; defaults to 0.
+	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
+
 	// Prefix optionally namespaces models for this credential (e.g., "teamA/gpt-5-codex").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
 
@@ -318,6 +326,10 @@ type GeminiKey struct {
 	// APIKey is the authentication key for accessing Gemini API services.
 	APIKey string `yaml:"api-key" json:"api-key"`
 
+	// Priority controls selection preference when multiple credentials match.
+	// Higher values are preferred; defaults to 0.
+	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
+
 	// Prefix optionally namespaces models for this credential (e.g., "teamA/gemini-3-pro-preview").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
 
@@ -355,6 +367,10 @@ type OpenAICompatibility struct {
 	// Name is the identifier for this OpenAI compatibility configuration.
 	Name string `yaml:"name" json:"name"`
 
+	// Priority controls selection preference when multiple providers or credentials match.
+	// Higher values are preferred; defaults to 0.
+	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
+
 	// Prefix optionally namespaces model aliases for this provider (e.g., "teamA/kimi-k2").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
 
diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go
index 94e162b7..632bf7cc 100644
--- a/internal/config/vertex_compat.go
+++ b/internal/config/vertex_compat.go
@@ -13,6 +13,10 @@ type VertexCompatKey struct {
 	// Maps to the x-goog-api-key header.
 	APIKey string `yaml:"api-key" json:"api-key"`
 
+	// Priority controls selection preference when multiple credentials match.
+	// Higher values are preferred; defaults to 0.
+	Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
+
 	// Prefix optionally namespaces model aliases for this credential (e.g., "teamA/vertex-pro").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
 
diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go
index 2f2b2690..b1ae5885 100644
--- a/internal/watcher/synthesizer/config.go
+++ b/internal/watcher/synthesizer/config.go
@@ -2,6 +2,7 @@ package synthesizer
 
 import (
 	"fmt"
+	"strconv"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff"
@@ -59,6 +60,9 @@ func (s *ConfigSynthesizer) synthesizeGeminiKeys(ctx *SynthesisContext) []*corea
 			"source":  fmt.Sprintf("config:gemini[%s]", token),
 			"api_key": key,
 		}
+		if entry.Priority != 0 {
+			attrs["priority"] = strconv.Itoa(entry.Priority)
+		}
 		if base != "" {
 			attrs["base_url"] = base
 		}
@@ -103,6 +107,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea
 			"source":  fmt.Sprintf("config:claude[%s]", token),
 			"api_key": key,
 		}
+		if ck.Priority != 0 {
+			attrs["priority"] = strconv.Itoa(ck.Priority)
+		}
 		if base != "" {
 			attrs["base_url"] = base
 		}
@@ -147,6 +154,9 @@ func (s *ConfigSynthesizer) synthesizeCodexKeys(ctx *SynthesisContext) []*coreau
 			"source":  fmt.Sprintf("config:codex[%s]", token),
 			"api_key": key,
 		}
+		if ck.Priority != 0 {
+			attrs["priority"] = strconv.Itoa(ck.Priority)
+		}
 		if ck.BaseURL != "" {
 			attrs["base_url"] = ck.BaseURL
 		}
@@ -202,6 +212,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor
 				"compat_name":  compat.Name,
 				"provider_key": providerName,
 			}
+			if compat.Priority != 0 {
+				attrs["priority"] = strconv.Itoa(compat.Priority)
+			}
 			if key != "" {
 				attrs["api_key"] = key
 			}
@@ -233,6 +246,9 @@ func (s *ConfigSynthesizer) synthesizeOpenAICompat(ctx *SynthesisContext) []*cor
 				"compat_name":  compat.Name,
 				"provider_key": providerName,
 			}
+			if compat.Priority != 0 {
+				attrs["priority"] = strconv.Itoa(compat.Priority)
+			}
 			if hash := diff.ComputeOpenAICompatModelsHash(compat.Models); hash != "" {
 				attrs["models_hash"] = hash
 			}
@@ -275,6 +291,9 @@ func (s *ConfigSynthesizer) synthesizeVertexCompat(ctx *SynthesisContext) []*cor
 			"base_url":     base,
 			"provider_key": providerName,
 		}
+		if compat.Priority != 0 {
+			attrs["priority"] = strconv.Itoa(compat.Priority)
+		}
 		if key != "" {
 			attrs["api_key"] = key
 		}
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index 431e2259..dc7bc10b 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -271,7 +271,6 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
 	if len(normalized) == 0 {
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	rotated := m.rotateProviders(req.Model, normalized)
 
 	retryTimes, maxWait := m.retrySettings()
 	attempts := retryTimes + 1
@@ -281,14 +280,12 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
 
 	var lastErr error
 	for attempt := 0; attempt < attempts; attempt++ {
-		resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) {
-			return m.executeWithProvider(execCtx, provider, req, opts)
-		})
+		resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts)
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
-		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -309,7 +306,6 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 	if len(normalized) == 0 {
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	rotated := m.rotateProviders(req.Model, normalized)
 
 	retryTimes, maxWait := m.retrySettings()
 	attempts := retryTimes + 1
@@ -319,14 +315,12 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 
 	var lastErr error
 	for attempt := 0; attempt < attempts; attempt++ {
-		resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) {
-			return m.executeCountWithProvider(execCtx, provider, req, opts)
-		})
+		resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts)
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
-		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -347,7 +341,6 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 	if len(normalized) == 0 {
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	rotated := m.rotateProviders(req.Model, normalized)
 
 	retryTimes, maxWait := m.retrySettings()
 	attempts := retryTimes + 1
@@ -357,14 +350,12 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 
 	var lastErr error
 	for attempt := 0; attempt < attempts; attempt++ {
-		chunks, errStream := m.executeStreamProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (<-chan cliproxyexecutor.StreamChunk, error) {
-			return m.executeStreamWithProvider(execCtx, provider, req, opts)
-		})
+		chunks, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
 		if errStream == nil {
 			return chunks, nil
 		}
 		lastErr = errStream
-		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, rotated, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -378,6 +369,167 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 	return nil, &Error{Code: "auth_not_found", Message: "no auth available"}
 }
 
+func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	if len(providers) == 0 {
+		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+	routeModel := req.Model
+	tried := make(map[string]struct{})
+	var lastErr error
+	for {
+		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
+		if errPick != nil {
+			if lastErr != nil {
+				return cliproxyexecutor.Response{}, lastErr
+			}
+			return cliproxyexecutor.Response{}, errPick
+		}
+
+		entry := logEntryWithRequestID(ctx)
+		debugLogAuthSelection(entry, auth, provider, req.Model)
+
+		tried[auth.ID] = struct{}{}
+		execCtx := ctx
+		if rt := m.roundTripperFor(auth); rt != nil {
+			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
+			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
+		}
+		execReq := req
+		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
+		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
+		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
+		if errExec != nil {
+			result.Error = &Error{Message: errExec.Error()}
+			var se cliproxyexecutor.StatusError
+			if errors.As(errExec, &se) && se != nil {
+				result.Error.HTTPStatus = se.StatusCode()
+			}
+			if ra := retryAfterFromError(errExec); ra != nil {
+				result.RetryAfter = ra
+			}
+			m.MarkResult(execCtx, result)
+			lastErr = errExec
+			continue
+		}
+		m.MarkResult(execCtx, result)
+		return resp, nil
+	}
+}
+
+func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	if len(providers) == 0 {
+		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+	routeModel := req.Model
+	tried := make(map[string]struct{})
+	var lastErr error
+	for {
+		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
+		if errPick != nil {
+			if lastErr != nil {
+				return cliproxyexecutor.Response{}, lastErr
+			}
+			return cliproxyexecutor.Response{}, errPick
+		}
+
+		entry := logEntryWithRequestID(ctx)
+		debugLogAuthSelection(entry, auth, provider, req.Model)
+
+		tried[auth.ID] = struct{}{}
+		execCtx := ctx
+		if rt := m.roundTripperFor(auth); rt != nil {
+			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
+			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
+		}
+		execReq := req
+		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
+		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
+		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
+		if errExec != nil {
+			result.Error = &Error{Message: errExec.Error()}
+			var se cliproxyexecutor.StatusError
+			if errors.As(errExec, &se) && se != nil {
+				result.Error.HTTPStatus = se.StatusCode()
+			}
+			if ra := retryAfterFromError(errExec); ra != nil {
+				result.RetryAfter = ra
+			}
+			m.MarkResult(execCtx, result)
+			lastErr = errExec
+			continue
+		}
+		m.MarkResult(execCtx, result)
+		return resp, nil
+	}
+}
+
+func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (<-chan cliproxyexecutor.StreamChunk, error) {
+	if len(providers) == 0 {
+		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+	routeModel := req.Model
+	tried := make(map[string]struct{})
+	var lastErr error
+	for {
+		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
+		if errPick != nil {
+			if lastErr != nil {
+				return nil, lastErr
+			}
+			return nil, errPick
+		}
+
+		entry := logEntryWithRequestID(ctx)
+		debugLogAuthSelection(entry, auth, provider, req.Model)
+
+		tried[auth.ID] = struct{}{}
+		execCtx := ctx
+		if rt := m.roundTripperFor(auth); rt != nil {
+			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
+			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
+		}
+		execReq := req
+		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
+		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
+		if errStream != nil {
+			rerr := &Error{Message: errStream.Error()}
+			var se cliproxyexecutor.StatusError
+			if errors.As(errStream, &se) && se != nil {
+				rerr.HTTPStatus = se.StatusCode()
+			}
+			result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: false, Error: rerr}
+			result.RetryAfter = retryAfterFromError(errStream)
+			m.MarkResult(execCtx, result)
+			lastErr = errStream
+			continue
+		}
+		out := make(chan cliproxyexecutor.StreamChunk)
+		go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
+			defer close(out)
+			var failed bool
+			for chunk := range streamChunks {
+				if chunk.Err != nil && !failed {
+					failed = true
+					rerr := &Error{Message: chunk.Err.Error()}
+					var se cliproxyexecutor.StatusError
+					if errors.As(chunk.Err, &se) && se != nil {
+						rerr.HTTPStatus = se.StatusCode()
+					}
+					m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
+				}
+				out <- chunk
+			}
+			if !failed {
+				m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
+			}
+		}(execCtx, auth.Clone(), provider, chunks)
+		return out, nil
+	}
+}
+
 func (m *Manager) executeWithProvider(ctx context.Context, provider string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	if provider == "" {
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "provider identifier is empty"}
@@ -1191,6 +1343,77 @@ func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cli
 	return authCopy, executor, nil
 }
 
+func (m *Manager) pickNextMixed(ctx context.Context, providers []string, model string, opts cliproxyexecutor.Options, tried map[string]struct{}) (*Auth, ProviderExecutor, string, error) {
+	providerSet := make(map[string]struct{}, len(providers))
+	for _, provider := range providers {
+		p := strings.TrimSpace(strings.ToLower(provider))
+		if p == "" {
+			continue
+		}
+		providerSet[p] = struct{}{}
+	}
+	if len(providerSet) == 0 {
+		return nil, nil, "", &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+
+	m.mu.RLock()
+	candidates := make([]*Auth, 0, len(m.auths))
+	modelKey := strings.TrimSpace(model)
+	registryRef := registry.GetGlobalRegistry()
+	for _, candidate := range m.auths {
+		if candidate == nil || candidate.Disabled {
+			continue
+		}
+		providerKey := strings.TrimSpace(strings.ToLower(candidate.Provider))
+		if providerKey == "" {
+			continue
+		}
+		if _, ok := providerSet[providerKey]; !ok {
+			continue
+		}
+		if _, used := tried[candidate.ID]; used {
+			continue
+		}
+		if _, ok := m.executors[providerKey]; !ok {
+			continue
+		}
+		if modelKey != "" && registryRef != nil && !registryRef.ClientSupportsModel(candidate.ID, modelKey) {
+			continue
+		}
+		candidates = append(candidates, candidate)
+	}
+	if len(candidates) == 0 {
+		m.mu.RUnlock()
+		return nil, nil, "", &Error{Code: "auth_not_found", Message: "no auth available"}
+	}
+	selected, errPick := m.selector.Pick(ctx, "mixed", model, opts, candidates)
+	if errPick != nil {
+		m.mu.RUnlock()
+		return nil, nil, "", errPick
+	}
+	if selected == nil {
+		m.mu.RUnlock()
+		return nil, nil, "", &Error{Code: "auth_not_found", Message: "selector returned no auth"}
+	}
+	providerKey := strings.TrimSpace(strings.ToLower(selected.Provider))
+	executor, okExecutor := m.executors[providerKey]
+	if !okExecutor {
+		m.mu.RUnlock()
+		return nil, nil, "", &Error{Code: "executor_not_found", Message: "executor not registered"}
+	}
+	authCopy := selected.Clone()
+	m.mu.RUnlock()
+	if !selected.indexAssigned {
+		m.mu.Lock()
+		if current := m.auths[authCopy.ID]; current != nil && !current.indexAssigned {
+			current.EnsureIndex()
+			authCopy = current.Clone()
+		}
+		m.mu.Unlock()
+	}
+	return authCopy, executor, providerKey, nil
+}
+
 func (m *Manager) persist(ctx context.Context, auth *Auth) error {
 	if m.store == nil || auth == nil {
 		return nil
diff --git a/sdk/cliproxy/auth/selector.go b/sdk/cliproxy/auth/selector.go
index d7e120c5..7febf219 100644
--- a/sdk/cliproxy/auth/selector.go
+++ b/sdk/cliproxy/auth/selector.go
@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"sort"
 	"strconv"
+	"strings"
 	"sync"
 	"time"
 
@@ -103,13 +104,29 @@ func (e *modelCooldownError) Headers() http.Header {
 	return headers
 }
 
-func collectAvailable(auths []*Auth, model string, now time.Time) (available []*Auth, cooldownCount int, earliest time.Time) {
-	available = make([]*Auth, 0, len(auths))
+func authPriority(auth *Auth) int {
+	if auth == nil || auth.Attributes == nil {
+		return 0
+	}
+	raw := strings.TrimSpace(auth.Attributes["priority"])
+	if raw == "" {
+		return 0
+	}
+	parsed, err := strconv.Atoi(raw)
+	if err != nil {
+		return 0
+	}
+	return parsed
+}
+
+func collectAvailableByPriority(auths []*Auth, model string, now time.Time) (available map[int][]*Auth, cooldownCount int, earliest time.Time) {
+	available = make(map[int][]*Auth)
 	for i := 0; i < len(auths); i++ {
 		candidate := auths[i]
 		blocked, reason, next := isAuthBlockedForModel(candidate, model, now)
 		if !blocked {
-			available = append(available, candidate)
+			priority := authPriority(candidate)
+			available[priority] = append(available[priority], candidate)
 			continue
 		}
 		if reason == blockReasonCooldown {
@@ -119,9 +136,6 @@ func collectAvailable(auths []*Auth, model string, now time.Time) (available []*
 			}
 		}
 	}
-	if len(available) > 1 {
-		sort.Slice(available, func(i, j int) bool { return available[i].ID < available[j].ID })
-	}
 	return available, cooldownCount, earliest
 }
 
@@ -130,18 +144,35 @@ func getAvailableAuths(auths []*Auth, provider, model string, now time.Time) ([]
 		return nil, &Error{Code: "auth_not_found", Message: "no auth candidates"}
 	}
 
-	available, cooldownCount, earliest := collectAvailable(auths, model, now)
-	if len(available) == 0 {
+	availableByPriority, cooldownCount, earliest := collectAvailableByPriority(auths, model, now)
+	if len(availableByPriority) == 0 {
 		if cooldownCount == len(auths) && !earliest.IsZero() {
+			providerForError := provider
+			if providerForError == "mixed" {
+				providerForError = ""
+			}
 			resetIn := earliest.Sub(now)
 			if resetIn < 0 {
 				resetIn = 0
 			}
-			return nil, newModelCooldownError(model, provider, resetIn)
+			return nil, newModelCooldownError(model, providerForError, resetIn)
 		}
 		return nil, &Error{Code: "auth_unavailable", Message: "no auth available"}
 	}
 
+	bestPriority := 0
+	found := false
+	for priority := range availableByPriority {
+		if !found || priority > bestPriority {
+			bestPriority = priority
+			found = true
+		}
+	}
+
+	available := availableByPriority[bestPriority]
+	if len(available) > 1 {
+		sort.Slice(available, func(i, j int) bool { return available[i].ID < available[j].ID })
+	}
 	return available, nil
 }
 
diff --git a/sdk/cliproxy/auth/selector_test.go b/sdk/cliproxy/auth/selector_test.go
index f4beed03..91a7ed14 100644
--- a/sdk/cliproxy/auth/selector_test.go
+++ b/sdk/cliproxy/auth/selector_test.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"sync"
 	"testing"
+	"time"
 
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 )
@@ -56,6 +57,69 @@ func TestRoundRobinSelectorPick_CyclesDeterministic(t *testing.T) {
 	}
 }
 
+func TestRoundRobinSelectorPick_PriorityBuckets(t *testing.T) {
+	t.Parallel()
+
+	selector := &RoundRobinSelector{}
+	auths := []*Auth{
+		{ID: "c", Attributes: map[string]string{"priority": "0"}},
+		{ID: "a", Attributes: map[string]string{"priority": "10"}},
+		{ID: "b", Attributes: map[string]string{"priority": "10"}},
+	}
+
+	want := []string{"a", "b", "a", "b"}
+	for i, id := range want {
+		got, err := selector.Pick(context.Background(), "mixed", "", cliproxyexecutor.Options{}, auths)
+		if err != nil {
+			t.Fatalf("Pick() #%d error = %v", i, err)
+		}
+		if got == nil {
+			t.Fatalf("Pick() #%d auth = nil", i)
+		}
+		if got.ID != id {
+			t.Fatalf("Pick() #%d auth.ID = %q, want %q", i, got.ID, id)
+		}
+		if got.ID == "c" {
+			t.Fatalf("Pick() #%d unexpectedly selected lower priority auth", i)
+		}
+	}
+}
+
+func TestFillFirstSelectorPick_PriorityFallbackCooldown(t *testing.T) {
+	t.Parallel()
+
+	selector := &FillFirstSelector{}
+	now := time.Now()
+	model := "test-model"
+
+	high := &Auth{
+		ID:         "high",
+		Attributes: map[string]string{"priority": "10"},
+		ModelStates: map[string]*ModelState{
+			model: {
+				Status:         StatusActive,
+				Unavailable:    true,
+				NextRetryAfter: now.Add(30 * time.Minute),
+				Quota: QuotaState{
+					Exceeded: true,
+				},
+			},
+		},
+	}
+	low := &Auth{ID: "low", Attributes: map[string]string{"priority": "0"}}
+
+	got, err := selector.Pick(context.Background(), "mixed", model, cliproxyexecutor.Options{}, []*Auth{high, low})
+	if err != nil {
+		t.Fatalf("Pick() error = %v", err)
+	}
+	if got == nil {
+		t.Fatalf("Pick() auth = nil")
+	}
+	if got.ID != "low" {
+		t.Fatalf("Pick() auth.ID = %q, want %q", got.ID, "low")
+	}
+}
+
 func TestRoundRobinSelectorPick_Concurrent(t *testing.T) {
 	selector := &RoundRobinSelector{}
 	auths := []*Auth{

From 5a7e5bd87084d151aa54e4d19823c769c536e466 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Thu, 15 Jan 2026 11:40:22 +0800
Subject: [PATCH 05/65] feat(auth): add Antigravity onboarding with tier
 selection

- Updated `ideType` to `ANTIGRAVITY` in request payload.
- Introduced tier-selection logic to determine default tier for onboarding.
- Added `antigravityOnboardUser` function for project ID retrieval via polling.
- Enhanced error handling and response decoding for onboarding flow.
---
 sdk/auth/antigravity.go | 130 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 128 insertions(+), 2 deletions(-)

diff --git a/sdk/auth/antigravity.go b/sdk/auth/antigravity.go
index b59acacf..210da57f 100644
--- a/sdk/auth/antigravity.go
+++ b/sdk/auth/antigravity.go
@@ -382,7 +382,7 @@ func fetchAntigravityProjectID(ctx context.Context, accessToken string, httpClie
 	// Call loadCodeAssist to get the project
 	loadReqBody := map[string]any{
 		"metadata": map[string]string{
-			"ideType":    "IDE_UNSPECIFIED",
+			"ideType":    "ANTIGRAVITY",
 			"platform":   "PLATFORM_UNSPECIFIED",
 			"pluginType": "GEMINI",
 		},
@@ -442,8 +442,134 @@ func fetchAntigravityProjectID(ctx context.Context, accessToken string, httpClie
 	}
 
 	if projectID == "" {
-		return "", fmt.Errorf("no cloudaicompanionProject in response")
+		tierID := "legacy-tier"
+		if tiers, okTiers := loadResp["allowedTiers"].([]any); okTiers {
+			for _, rawTier := range tiers {
+				tier, okTier := rawTier.(map[string]any)
+				if !okTier {
+					continue
+				}
+				if isDefault, okDefault := tier["isDefault"].(bool); okDefault && isDefault {
+					if id, okID := tier["id"].(string); okID && strings.TrimSpace(id) != "" {
+						tierID = strings.TrimSpace(id)
+						break
+					}
+				}
+			}
+		}
+
+		projectID, err = antigravityOnboardUser(ctx, accessToken, tierID, httpClient)
+		if err != nil {
+			return "", err
+		}
+		return projectID, nil
 	}
 
 	return projectID, nil
 }
+
+// antigravityOnboardUser attempts to fetch the project ID via onboardUser by polling for completion.
+// It returns an empty string when the operation times out or completes without a project ID.
+func antigravityOnboardUser(ctx context.Context, accessToken, tierID string, httpClient *http.Client) (string, error) {
+	if httpClient == nil {
+		httpClient = http.DefaultClient
+	}
+	fmt.Println("Antigravity: onboarding user...", tierID)
+	requestBody := map[string]any{
+		"tierId": tierID,
+		"metadata": map[string]string{
+			"ideType":    "ANTIGRAVITY",
+			"platform":   "PLATFORM_UNSPECIFIED",
+			"pluginType": "GEMINI",
+		},
+	}
+
+	rawBody, errMarshal := json.Marshal(requestBody)
+	if errMarshal != nil {
+		return "", fmt.Errorf("marshal request body: %w", errMarshal)
+	}
+
+	maxAttempts := 5
+	for attempt := 1; attempt <= maxAttempts; attempt++ {
+		log.Debugf("Polling attempt %d/%d", attempt, maxAttempts)
+
+		reqCtx := ctx
+		var cancel context.CancelFunc
+		if reqCtx == nil {
+			reqCtx = context.Background()
+		}
+		reqCtx, cancel = context.WithTimeout(reqCtx, 30*time.Second)
+
+		endpointURL := fmt.Sprintf("%s/%s:onboardUser", antigravityAPIEndpoint, antigravityAPIVersion)
+		req, errRequest := http.NewRequestWithContext(reqCtx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
+		if errRequest != nil {
+			cancel()
+			return "", fmt.Errorf("create request: %w", errRequest)
+		}
+		req.Header.Set("Authorization", "Bearer "+accessToken)
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("User-Agent", antigravityAPIUserAgent)
+		req.Header.Set("X-Goog-Api-Client", antigravityAPIClient)
+		req.Header.Set("Client-Metadata", antigravityClientMetadata)
+
+		resp, errDo := httpClient.Do(req)
+		if errDo != nil {
+			cancel()
+			return "", fmt.Errorf("execute request: %w", errDo)
+		}
+
+		bodyBytes, errRead := io.ReadAll(resp.Body)
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("close body error: %v", errClose)
+		}
+		cancel()
+
+		if errRead != nil {
+			return "", fmt.Errorf("read response: %w", errRead)
+		}
+
+		if resp.StatusCode == http.StatusOK {
+			var data map[string]any
+			if errDecode := json.Unmarshal(bodyBytes, &data); errDecode != nil {
+				return "", fmt.Errorf("decode response: %w", errDecode)
+			}
+
+			if done, okDone := data["done"].(bool); okDone && done {
+				projectID := ""
+				if responseData, okResp := data["response"].(map[string]any); okResp {
+					switch projectValue := responseData["cloudaicompanionProject"].(type) {
+					case map[string]any:
+						if id, okID := projectValue["id"].(string); okID {
+							projectID = strings.TrimSpace(id)
+						}
+					case string:
+						projectID = strings.TrimSpace(projectValue)
+					}
+				}
+
+				if projectID != "" {
+					log.Infof("Successfully fetched project_id: %s", projectID)
+					return projectID, nil
+				}
+
+				return "", fmt.Errorf("no project_id in response")
+			}
+
+			time.Sleep(2 * time.Second)
+			continue
+		}
+
+		responsePreview := strings.TrimSpace(string(bodyBytes))
+		if len(responsePreview) > 500 {
+			responsePreview = responsePreview[:500]
+		}
+
+		responseErr := responsePreview
+		if len(responseErr) > 200 {
+			responseErr = responseErr[:200]
+		}
+		return "", fmt.Errorf("http %d: %s", resp.StatusCode, responseErr)
+	}
+
+	return "", nil
+}

From 0b06d637e701574b5af34ffc9a54ecd53a40182c Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 08:32:02 +0800
Subject: [PATCH 06/65] refactor: improve thinking logic

---
 internal/api/modules/amp/fallback_handlers.go |   14 +-
 internal/api/modules/amp/model_mapping.go     |   44 +-
 .../api/modules/amp/model_mapping_test.go     |   98 +-
 internal/config/config.go                     |   12 +
 internal/config/vertex_compat.go              |    3 +
 internal/registry/model_definitions.go        |   16 +-
 internal/registry/model_registry.go           |    5 +
 .../runtime/executor/aistudio_executor.go     |   32 +-
 .../runtime/executor/antigravity_executor.go  |  108 +-
 internal/runtime/executor/claude_executor.go  |  121 +-
 internal/runtime/executor/codex_executor.go   |  113 +-
 .../runtime/executor/gemini_cli_executor.go   |   77 +-
 internal/runtime/executor/gemini_executor.go  |  122 +-
 .../executor/gemini_vertex_executor.go        |  234 +--
 internal/runtime/executor/iflow_executor.go   |   88 +-
 .../runtime/executor/iflow_executor_test.go   |   67 +
 .../executor/openai_compat_executor.go        |  105 +-
 internal/runtime/executor/payload_helpers.go  |  195 --
 internal/runtime/executor/qwen_executor.go    |   52 +-
 .../runtime/executor/qwen_executor_test.go    |   30 +
 .../runtime/executor/thinking_providers.go    |   10 +
 internal/thinking/apply.go                    |  430 +++++
 internal/thinking/apply_main_test.go          |  144 ++
 internal/thinking/apply_test.go               |  501 ++++++
 internal/thinking/convert.go                  |  233 +++
 internal/thinking/convert_test.go             |  277 +++
 internal/thinking/errors.go                   |   71 +
 internal/thinking/errors_test.go              |   34 +
 internal/thinking/extract_test.go             |   42 +
 internal/thinking/provider/claude/apply.go    |  116 ++
 .../thinking/provider/claude/apply_test.go    |  288 +++
 internal/thinking/provider/codex/apply.go     |  138 ++
 internal/thinking/provider/gemini/apply.go    |  172 ++
 .../thinking/provider/gemini/apply_test.go    |  526 ++++++
 internal/thinking/provider/geminicli/apply.go |  128 ++
 .../thinking/provider/geminicli/apply_test.go |  382 ++++
 internal/thinking/provider/iflow/apply.go     |  160 ++
 .../thinking/provider/iflow/apply_test.go     |  328 ++++
 internal/thinking/provider/openai/apply.go    |  135 ++
 .../thinking/provider/openai/apply_test.go    |  343 ++++
 internal/thinking/provider_map_test.go        |   51 +
 internal/thinking/strip.go                    |   54 +
 internal/thinking/strip_test.go               |   66 +
 internal/thinking/suffix.go                   |  170 ++
 internal/thinking/suffix_test.go              |  313 ++++
 internal/thinking/types.go                    |  100 ++
 internal/thinking/validate.go                 |  260 +++
 internal/thinking/validate_test.go            |  349 ++++
 .../claude/antigravity_claude_request.go      |   16 +-
 .../antigravity_openai_request.go             |   12 +-
 .../claude/gemini/claude_gemini_request.go    |   23 +-
 .../chat-completions/claude_openai_request.go |   32 +-
 .../claude_openai-responses_request.go        |   32 +-
 .../codex/claude/codex_claude_request.go      |   16 +-
 .../codex/gemini/codex_gemini_request.go      |    7 +-
 .../claude/gemini-cli_claude_request.go       |   17 +-
 .../gemini-cli_openai_request.go              |    6 +-
 .../gemini/claude/gemini_claude_request.go    |   17 +-
 .../chat-completions/gemini_openai_request.go |    8 +-
 .../gemini_openai-responses_request.go        |    6 +-
 .../openai/claude/openai_claude_request.go    |   13 +-
 .../openai/gemini/openai_gemini_request.go    |    4 +-
 internal/util/thinking.go                     |   18 +
 internal/util/thinking_deprecation_test.go    |  130 ++
 internal/util/thinking_suffix.go              |   31 +-
 internal/util/thinking_text.go                |    8 +
 sdk/api/handlers/handlers.go                  |   80 +-
 .../handlers/handlers_request_details_test.go |  118 ++
 .../auth/api_key_model_mappings_test.go       |  201 +++
 sdk/cliproxy/auth/conductor.go                |  411 ++++-
 sdk/cliproxy/auth/model_name_mappings.go      |  120 +-
 sdk/cliproxy/auth/model_name_mappings_test.go |  187 ++
 sdk/cliproxy/builder.go                       |    1 +
 sdk/cliproxy/service.go                       |    3 +
 test/model_alias_thinking_suffix_test.go      |   59 +-
 test/thinking_conversion_test.go              | 1594 +++++++++--------
 76 files changed, 8712 insertions(+), 1815 deletions(-)
 create mode 100644 internal/runtime/executor/iflow_executor_test.go
 create mode 100644 internal/runtime/executor/qwen_executor_test.go
 create mode 100644 internal/runtime/executor/thinking_providers.go
 create mode 100644 internal/thinking/apply.go
 create mode 100644 internal/thinking/apply_main_test.go
 create mode 100644 internal/thinking/apply_test.go
 create mode 100644 internal/thinking/convert.go
 create mode 100644 internal/thinking/convert_test.go
 create mode 100644 internal/thinking/errors.go
 create mode 100644 internal/thinking/errors_test.go
 create mode 100644 internal/thinking/extract_test.go
 create mode 100644 internal/thinking/provider/claude/apply.go
 create mode 100644 internal/thinking/provider/claude/apply_test.go
 create mode 100644 internal/thinking/provider/codex/apply.go
 create mode 100644 internal/thinking/provider/gemini/apply.go
 create mode 100644 internal/thinking/provider/gemini/apply_test.go
 create mode 100644 internal/thinking/provider/geminicli/apply.go
 create mode 100644 internal/thinking/provider/geminicli/apply_test.go
 create mode 100644 internal/thinking/provider/iflow/apply.go
 create mode 100644 internal/thinking/provider/iflow/apply_test.go
 create mode 100644 internal/thinking/provider/openai/apply.go
 create mode 100644 internal/thinking/provider/openai/apply_test.go
 create mode 100644 internal/thinking/provider_map_test.go
 create mode 100644 internal/thinking/strip.go
 create mode 100644 internal/thinking/strip_test.go
 create mode 100644 internal/thinking/suffix.go
 create mode 100644 internal/thinking/suffix_test.go
 create mode 100644 internal/thinking/types.go
 create mode 100644 internal/thinking/validate.go
 create mode 100644 internal/thinking/validate_test.go
 create mode 100644 internal/util/thinking_deprecation_test.go
 create mode 100644 sdk/api/handlers/handlers_request_details_test.go
 create mode 100644 sdk/cliproxy/auth/api_key_model_mappings_test.go
 create mode 100644 sdk/cliproxy/auth/model_name_mappings_test.go

diff --git a/internal/api/modules/amp/fallback_handlers.go b/internal/api/modules/amp/fallback_handlers.go
index 940bd5e8..7d7f7f5f 100644
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -134,10 +135,11 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 		}
 
 		// Normalize model (handles dynamic thinking suffixes)
-		normalizedModel, thinkingMetadata := util.NormalizeThinkingModel(modelName)
+		suffixResult := thinking.ParseSuffix(modelName)
+		normalizedModel := suffixResult.ModelName
 		thinkingSuffix := ""
-		if thinkingMetadata != nil && strings.HasPrefix(modelName, normalizedModel) {
-			thinkingSuffix = modelName[len(normalizedModel):]
+		if suffixResult.HasSuffix {
+			thinkingSuffix = "(" + suffixResult.RawSuffix + ")"
 		}
 
 		resolveMappedModel := func() (string, []string) {
@@ -157,13 +159,13 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			// Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target
 			// already specifies its own thinking suffix.
 			if thinkingSuffix != "" {
-				_, mappedThinkingMetadata := util.NormalizeThinkingModel(mappedModel)
-				if mappedThinkingMetadata == nil {
+				mappedSuffixResult := thinking.ParseSuffix(mappedModel)
+				if !mappedSuffixResult.HasSuffix {
 					mappedModel += thinkingSuffix
 				}
 			}
 
-			mappedBaseModel, _ := util.NormalizeThinkingModel(mappedModel)
+			mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName
 			mappedProviders := util.GetProviderName(mappedBaseModel)
 			if len(mappedProviders) == 0 {
 				return "", nil
diff --git a/internal/api/modules/amp/model_mapping.go b/internal/api/modules/amp/model_mapping.go
index 4b629b62..4159a2b5 100644
--- a/internal/api/modules/amp/model_mapping.go
+++ b/internal/api/modules/amp/model_mapping.go
@@ -8,6 +8,7 @@ import (
 	"sync"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )
@@ -44,6 +45,11 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
 // MapModel checks if a mapping exists for the requested model and if the
 // target model has available local providers. Returns the mapped model name
 // or empty string if no valid mapping exists.
+//
+// If the requested model contains a thinking suffix (e.g., "g25p(8192)"),
+// the suffix is preserved in the returned model name (e.g., "gemini-2.5-pro(8192)").
+// However, if the mapping target already contains a suffix, the config suffix
+// takes priority over the user's suffix.
 func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 	if requestedModel == "" {
 		return ""
@@ -52,16 +58,20 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 	m.mu.RLock()
 	defer m.mu.RUnlock()
 
-	// Normalize the requested model for lookup
-	normalizedRequest := strings.ToLower(strings.TrimSpace(requestedModel))
+	// Extract thinking suffix from requested model using ParseSuffix
+	requestResult := thinking.ParseSuffix(requestedModel)
+	baseModel := requestResult.ModelName
 
-	// Check for direct mapping
-	targetModel, exists := m.mappings[normalizedRequest]
+	// Normalize the base model for lookup (case-insensitive)
+	normalizedBase := strings.ToLower(strings.TrimSpace(baseModel))
+
+	// Check for direct mapping using base model name
+	targetModel, exists := m.mappings[normalizedBase]
 	if !exists {
-		// Try regex mappings in order
-		base, _ := util.NormalizeThinkingModel(requestedModel)
+		// Try regex mappings in order using base model only
+		// (suffix is handled separately via ParseSuffix)
 		for _, rm := range m.regexps {
-			if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) {
+			if rm.re.MatchString(baseModel) {
 				targetModel = rm.to
 				exists = true
 				break
@@ -72,14 +82,28 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 		}
 	}
 
-	// Verify target model has available providers
-	normalizedTarget, _ := util.NormalizeThinkingModel(targetModel)
-	providers := util.GetProviderName(normalizedTarget)
+	// Check if target model already has a thinking suffix (config priority)
+	targetResult := thinking.ParseSuffix(targetModel)
+
+	// Verify target model has available providers (use base model for lookup)
+	providers := util.GetProviderName(targetResult.ModelName)
 	if len(providers) == 0 {
 		log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel)
 		return ""
 	}
 
+	// Suffix handling: config suffix takes priority, otherwise preserve user suffix
+	if targetResult.HasSuffix {
+		// Config's "to" already contains a suffix - use it as-is (config priority)
+		return targetModel
+	}
+
+	// Preserve user's thinking suffix on the mapped model
+	// (skip empty suffixes to avoid returning "model()")
+	if requestResult.HasSuffix && requestResult.RawSuffix != "" {
+		return targetModel + "(" + requestResult.RawSuffix + ")"
+	}
+
 	// Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go
 	return targetModel
 }
diff --git a/internal/api/modules/amp/model_mapping_test.go b/internal/api/modules/amp/model_mapping_test.go
index 1b36f212..53165d22 100644
--- a/internal/api/modules/amp/model_mapping_test.go
+++ b/internal/api/modules/amp/model_mapping_test.go
@@ -217,10 +217,10 @@ func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) {
 
 	mapper := NewModelMapper(mappings)
 
-	// Incoming model has reasoning suffix but should match base via regex
+	// Incoming model has reasoning suffix, regex matches base, suffix is preserved
 	result := mapper.MapModel("gpt-5(high)")
-	if result != "gemini-2.5-pro" {
-		t.Errorf("Expected gemini-2.5-pro, got %s", result)
+	if result != "gemini-2.5-pro(high)" {
+		t.Errorf("Expected gemini-2.5-pro(high), got %s", result)
 	}
 }
 
@@ -281,3 +281,95 @@ func TestModelMapper_Regex_CaseInsensitive(t *testing.T) {
 		t.Errorf("Expected claude-sonnet-4, got %s", result)
 	}
 }
+
+func TestModelMapper_SuffixPreservation(t *testing.T) {
+	reg := registry.GetGlobalRegistry()
+
+	// Register test models
+	reg.RegisterClient("test-client-suffix", "gemini", []*registry.ModelInfo{
+		{ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"},
+	})
+	reg.RegisterClient("test-client-suffix-2", "claude", []*registry.ModelInfo{
+		{ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"},
+	})
+	defer reg.UnregisterClient("test-client-suffix")
+	defer reg.UnregisterClient("test-client-suffix-2")
+
+	tests := []struct {
+		name     string
+		mappings []config.AmpModelMapping
+		input    string
+		want     string
+	}{
+		{
+			name:     "numeric suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(8192)",
+			want:     "gemini-2.5-pro(8192)",
+		},
+		{
+			name:     "level suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(high)",
+			want:     "gemini-2.5-pro(high)",
+		},
+		{
+			name:     "no suffix unchanged",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p",
+			want:     "gemini-2.5-pro",
+		},
+		{
+			name:     "config suffix takes priority",
+			mappings: []config.AmpModelMapping{{From: "alias", To: "gemini-2.5-pro(medium)"}},
+			input:    "alias(high)",
+			want:     "gemini-2.5-pro(medium)",
+		},
+		{
+			name:     "regex with suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "^g25.*", To: "gemini-2.5-pro", Regex: true}},
+			input:    "g25p(8192)",
+			want:     "gemini-2.5-pro(8192)",
+		},
+		{
+			name:     "auto suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(auto)",
+			want:     "gemini-2.5-pro(auto)",
+		},
+		{
+			name:     "none suffix preserved",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p(none)",
+			want:     "gemini-2.5-pro(none)",
+		},
+		{
+			name:     "case insensitive base lookup with suffix",
+			mappings: []config.AmpModelMapping{{From: "G25P", To: "gemini-2.5-pro"}},
+			input:    "g25p(high)",
+			want:     "gemini-2.5-pro(high)",
+		},
+		{
+			name:     "empty suffix filtered out",
+			mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}},
+			input:    "g25p()",
+			want:     "gemini-2.5-pro",
+		},
+		{
+			name:     "incomplete suffix treated as no suffix",
+			mappings: []config.AmpModelMapping{{From: "g25p(high", To: "gemini-2.5-pro"}},
+			input:    "g25p(high",
+			want:     "gemini-2.5-pro",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mapper := NewModelMapper(tt.mappings)
+			got := mapper.MapModel(tt.input)
+			if got != tt.want {
+				t.Errorf("MapModel(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/internal/config/config.go b/internal/config/config.go
index 6843d6b8..effb44f5 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -266,6 +266,9 @@ type ClaudeKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }
 
+func (k ClaudeKey) GetAPIKey() string  { return k.APIKey }
+func (k ClaudeKey) GetBaseURL() string { return k.BaseURL }
+
 // ClaudeModel describes a mapping between an alias and the actual upstream model name.
 type ClaudeModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -308,6 +311,9 @@ type CodexKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }
 
+func (k CodexKey) GetAPIKey() string  { return k.APIKey }
+func (k CodexKey) GetBaseURL() string { return k.BaseURL }
+
 // CodexModel describes a mapping between an alias and the actual upstream model name.
 type CodexModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -349,6 +355,9 @@ type GeminiKey struct {
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }
 
+func (k GeminiKey) GetAPIKey() string  { return k.APIKey }
+func (k GeminiKey) GetBaseURL() string { return k.BaseURL }
+
 // GeminiModel describes a mapping between an alias and the actual upstream model name.
 type GeminiModel struct {
 	// Name is the upstream model identifier used when issuing requests.
@@ -406,6 +415,9 @@ type OpenAICompatibilityModel struct {
 	Alias string `yaml:"alias" json:"alias"`
 }
 
+func (m OpenAICompatibilityModel) GetName() string  { return m.Name }
+func (m OpenAICompatibilityModel) GetAlias() string { return m.Alias }
+
 // LoadConfig reads a YAML configuration file from the given path,
 // unmarshals it into a Config struct, applies environment variable overrides,
 // and returns it.
diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go
index 632bf7cc..786c5318 100644
--- a/internal/config/vertex_compat.go
+++ b/internal/config/vertex_compat.go
@@ -36,6 +36,9 @@ type VertexCompatKey struct {
 	Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"`
 }
 
+func (k VertexCompatKey) GetAPIKey() string  { return k.APIKey }
+func (k VertexCompatKey) GetBaseURL() string { return k.BaseURL }
+
 // VertexCompatModel represents a model configuration for Vertex compatibility,
 // including the actual model name and its alias for API routing.
 type VertexCompatModel struct {
diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index bea2ecc3..268caeb4 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -27,7 +27,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-5-20251101",
@@ -39,7 +39,7 @@ func GetClaudeModels() []*ModelInfo {
 			Description:         "Premium model combining maximum intelligence with practical performance",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-1-20250805",
@@ -50,7 +50,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.1 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-20250514",
@@ -61,7 +61,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-sonnet-4-20250514",
@@ -72,7 +72,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-7-sonnet-20250219",
@@ -83,7 +83,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.7 Sonnet",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-5-haiku-20241022",
@@ -777,8 +777,8 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
 		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
 		"gemini-3-flash-preview":                  {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
-		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
+		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
 	}
 }
 
diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
index a4e9acdf..c90f6f61 100644
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -51,6 +51,11 @@ type ModelInfo struct {
 	// Thinking holds provider-specific reasoning/thinking budget capabilities.
 	// This is optional and currently used for Gemini thinking budget normalization.
 	Thinking *ThinkingSupport `json:"thinking,omitempty"`
+
+	// UserDefined indicates this model was defined through config file's models[]
+	// array (e.g., openai-compatibility.*.models[], *-api-key.models[]).
+	// UserDefined models have thinking configuration passed through without validation.
+	UserDefined bool `json:"-"`
 }
 
 // ThinkingSupport describes a model family's supported internal reasoning budget range.
diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go
index c3e3edb0..cf8e216e 100644
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -14,7 +14,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -111,7 +111,8 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A
 
 // Execute performs a non-streaming request to the AI Studio API.
 func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	translatedReq, body, err := e.translateRequest(req, opts, false)
@@ -119,7 +120,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		return resp, err
 	}
 
-	endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
+	endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -166,7 +167,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 
 // ExecuteStream performs a streaming request to the AI Studio API.
 func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	translatedReq, body, err := e.translateRequest(req, opts, true)
@@ -174,7 +176,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		return nil, err
 	}
 
-	endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
+	endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt)
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -315,6 +317,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 
 // CountTokens counts tokens for the given request using the AI Studio API.
 func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	_, body, err := e.translateRequest(req, opts, false)
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
@@ -324,7 +327,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	body.payload, _ = sjson.DeleteBytes(body.payload, "tools")
 	body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings")
 
-	endpoint := e.buildEndpoint(req.Model, "countTokens", "")
+	endpoint := e.buildEndpoint(baseModel, "countTokens", "")
 	wsReq := &wsrelay.HTTPRequest{
 		Method:  http.MethodPost,
 		URL:     endpoint,
@@ -380,22 +383,19 @@ type translatedPayload struct {
 }
 
 func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
-	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
-	payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
-	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
-	payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true)
-	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
-	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
-	payload = fixGeminiImageAspectRatio(req.Model, payload)
-	payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini")
+	payload = fixGeminiImageAspectRatio(baseModel, payload)
+	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 8d1ef23d..4f704c05 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -24,6 +24,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -107,8 +108,10 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut
 
 // Execute performs a non-streaming request to the Antigravity API.
 func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
-	if isClaude || strings.Contains(req.Model, "gemini-3-pro") {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+	isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
+
+	if isClaude || strings.Contains(baseModel, "gemini-3-pro") {
 		return e.executeClaudeNonStream(ctx, auth, req, opts)
 	}
 
@@ -120,23 +123,24 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 		auth = updatedAuth
 	}
 
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
-	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
-	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
+	translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
+
+	// Preserve Claude special handling (use baseModel for registry lookups)
+	translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -146,7 +150,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	var lastErr error
 
 	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL)
+		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
 		if errReq != nil {
 			err = errReq
 			return resp, err
@@ -227,6 +231,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 
 // executeClaudeNonStream performs a claude non-streaming request to the Antigravity API.
 func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return resp, errToken
@@ -235,23 +241,24 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 		auth = updatedAuth
 	}
 
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
-	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
-	translated = normalizeAntigravityThinking(req.Model, translated, true)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
+	translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
+
+	// Preserve Claude special handling (use baseModel for registry lookups)
+	translated = normalizeAntigravityThinking(baseModel, translated, true)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -261,7 +268,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	var lastErr error
 
 	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
+		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
 		if errReq != nil {
 			err = errReq
 			return resp, err
@@ -587,7 +594,10 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
 
 // ExecuteStream performs a streaming request to the Antigravity API.
 func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	ctx = context.WithValue(ctx, "alt", "")
+	isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
 
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
@@ -597,25 +607,24 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 		auth = updatedAuth
 	}
 
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
-	isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
-
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
-	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
-	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
+	translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
+
+	// Preserve Claude special handling (use baseModel for registry lookups)
+	translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -625,7 +634,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	var lastErr error
 
 	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
+		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
 		if errReq != nil {
 			err = errReq
 			return nil, err
@@ -771,6 +780,9 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
 
 // CountTokens counts tokens for the given request using the Antigravity API.
 func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+	isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
+
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return cliproxyexecutor.Response{}, errToken
@@ -786,7 +798,16 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	to := sdktranslator.FromString("antigravity")
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 
-	isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
+	// Prepare payload once (doesn't depend on baseURL)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	payload, _ = thinking.ApplyThinking(payload, req.Model, "antigravity")
+
+	// Preserve Claude special handling (use baseModel for registry lookups)
+	payload = normalizeAntigravityThinking(baseModel, payload, isClaude)
+	payload = deleteJSONField(payload, "project")
+	payload = deleteJSONField(payload, "model")
+	payload = deleteJSONField(payload, "request.safetySettings")
 
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -803,14 +824,6 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	var lastErr error
 
 	for idx, baseURL := range baseURLs {
-		payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
-		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
-		payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
-		payload = deleteJSONField(payload, "project")
-		payload = deleteJSONField(payload, "model")
-		payload = deleteJSONField(payload, "request.safetySettings")
-
 		base := strings.TrimSuffix(baseURL, "/")
 		if base == "" {
 			base = buildBaseURL(auth)
@@ -1462,11 +1475,18 @@ func alias2ModelName(modelName string) string {
 	}
 }
 
-// normalizeAntigravityThinking clamps or removes thinking config based on model support.
-// For Claude models, it additionally ensures thinking budget < max_tokens.
+// normalizeAntigravityThinking performs Antigravity-specific thinking config normalization.
+// This function is called AFTER thinking.ApplyThinking() to apply Claude-specific constraints.
+//
+// It handles:
+//   - Stripping thinking config for unsupported models (via util.StripThinkingConfigIfUnsupported)
+//   - Normalizing budget to model range (via thinking.ClampBudget)
+//   - For Claude models: ensuring thinking budget < max_tokens
+//   - For Claude models: removing thinkingConfig if budget < minimum allowed
 func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte {
 	payload = util.StripThinkingConfigIfUnsupported(model, payload)
-	if !util.ModelSupportsThinking(model) {
+	modelInfo := registry.GetGlobalRegistry().GetModelInfo(model)
+	if modelInfo == nil || modelInfo.Thinking == nil {
 		return payload
 	}
 	budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
@@ -1474,7 +1494,7 @@ func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) [
 		return payload
 	}
 	raw := int(budget.Int())
-	normalized := util.NormalizeThinkingBudget(model, raw)
+	normalized := thinking.ClampBudget(raw, modelInfo.Thinking.Min, modelInfo.Thinking.Max)
 
 	if isClaude {
 		effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 4242a244..9f2a5b22 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -18,6 +18,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -84,17 +85,15 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
 }
 
 func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
+	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
@@ -103,22 +102,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
-	body, _ = sjson.SetBytes(body, "model", model)
-	// Inject thinking config based on model metadata for thinking variants
-	body = e.injectThinkingConfig(model, req.Metadata, body)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+	body, _ = thinking.ApplyThinking(body, req.Model, "claude")
+
+	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
 
 	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(model, body)
+	body = ensureMaxTokensForThinking(baseModel, body)
 
 	// Extract betas from body and convert to header
 	var extraBetas []string
@@ -218,36 +217,35 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 }
 
 func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
+	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
-	body, _ = sjson.SetBytes(body, "model", model)
-	// Inject thinking config based on model metadata for thinking variants
-	body = e.injectThinkingConfig(model, req.Metadata, body)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "claude")
+
 	body = checkSystemInstructions(body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
 
 	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(model, body)
+	body = ensureMaxTokensForThinking(baseModel, body)
 
 	// Extract betas from body and convert to header
 	var extraBetas []string
@@ -381,8 +379,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 }
 
 func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	apiKey, baseURL := claudeCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
+	apiKey, baseURL := claudeCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
@@ -391,14 +390,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
-	body, _ = sjson.SetBytes(body, "model", model)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
 
@@ -527,17 +522,6 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
 	return betas, body
 }
 
-// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
-// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
-// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
-func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
-	budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
-	if !ok {
-		return body
-	}
-	return util.ApplyClaudeThinkingConfig(body, budget)
-}
-
 // disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking.
 // Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool.
 // See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations
@@ -587,51 +571,6 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
 	return body
 }
 
-func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveClaudeConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
 func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
 	if auth == nil || e.cfg == nil {
 		return nil
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 2f4c6295..9e553e3c 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -13,6 +13,7 @@ import (
 	codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -72,18 +73,15 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
 }
 
 func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	apiKey, baseURL := codexCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
+	apiKey, baseURL := codexCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)
 
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -93,17 +91,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, false)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
-	body = NormalizeThinkingConfig(body, model, false)
-	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
-		return resp, errValidate
-	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "codex")
+
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
@@ -182,18 +178,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 }
 
 func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	apiKey, baseURL := codexCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
+	apiKey, baseURL := codexCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)
 
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -203,20 +196,17 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent)
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, true)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
 	body = misc.StripCodexUserAgent(body)
 
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
-	body = NormalizeThinkingConfig(body, model, false)
-	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
-		return nil, errValidate
-	}
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
+	body, _ = thinking.ApplyThinking(body, req.Model, "codex")
+
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
-	body, _ = sjson.SetBytes(body, "model", model)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -303,25 +293,23 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }
 
 func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	userAgent := codexUserAgent(ctx)
 	body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent)
-	body = sdktranslator.TranslateRequest(from, to, model, body, false)
+	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
 
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
-	body, _ = sjson.SetBytes(body, "model", model)
+	body, _ = thinking.ApplyThinking(body, req.Model, "codex")
+
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.SetBytes(body, "stream", false)
 
-	enc, err := tokenizerForCodexModel(model)
+	enc, err := tokenizerForCodexModel(baseModel)
 	if err != nil {
 		return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err)
 	}
@@ -593,51 +581,6 @@ func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
 	return
 }
 
-func (e *CodexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveCodexConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
 func (e *CodexExecutor) resolveCodexConfig(auth *cliproxyauth.Auth) *config.CodexKey {
 	if auth == nil || e.cfg == nil {
 		return nil
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index 20b93a92..3d08b830 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -20,6 +20,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -102,28 +103,30 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.
 
 // Execute performs a non-streaming request to the Gemini CLI API.
 func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return resp, err
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
-	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
-	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
-	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+
+	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
 
 	action := "generateContent"
 	if req.Metadata != nil {
@@ -133,9 +136,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	}
 
 	projectID := resolveGeminiProjectID(auth)
-	models := cliPreviewFallbackOrder(req.Model)
-	if len(models) == 0 || models[0] != req.Model {
-		models = append([]string{req.Model}, models...)
+	models := cliPreviewFallbackOrder(baseModel)
+	if len(models) == 0 || models[0] != baseModel {
+		models = append([]string{baseModel}, models...)
 	}
 
 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -246,34 +249,36 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 
 // ExecuteStream performs a streaming request to the Gemini CLI API.
 func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return nil, err
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
-	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
-	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
-	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+
+	basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+
+	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
 
 	projectID := resolveGeminiProjectID(auth)
 
-	models := cliPreviewFallbackOrder(req.Model)
-	if len(models) == 0 || models[0] != req.Model {
-		models = append([]string{req.Model}, models...)
+	models := cliPreviewFallbackOrder(baseModel)
+	if len(models) == 0 || models[0] != baseModel {
+		models = append([]string{baseModel}, models...)
 	}
 
 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -435,6 +440,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 
 // CountTokens counts tokens for the given request using the Gemini CLI API.
 func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
@@ -443,9 +450,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 
-	models := cliPreviewFallbackOrder(req.Model)
-	if len(models) == 0 || models[0] != req.Model {
-		models = append([]string{req.Model}, models...)
+	models := cliPreviewFallbackOrder(baseModel)
+	if len(models) == 0 || models[0] != baseModel {
+		models = append([]string{baseModel}, models...)
 	}
 
 	httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
@@ -463,15 +470,15 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 
 	// The loop variable attemptModel is only used as the concrete model id sent to the upstream
 	// Gemini CLI endpoint when iterating fallback variants.
-	for _, attemptModel := range models {
-		payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
-		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
-		payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
+	for range models {
+		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+		payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini-cli")
+
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
 		payload = deleteJSONField(payload, "request.safetySettings")
-		payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
-		payload = fixGeminiCLIImageAspectRatio(req.Model, payload)
+		payload = fixGeminiCLIImageAspectRatio(baseModel, payload)
 
 		tok, errTok := tokenSource.Token()
 		if errTok != nil {
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index a913a5c0..fd6ec22e 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -13,6 +13,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -102,16 +103,13 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
 //   - cliproxyexecutor.Response: The response from the API
 //   - error: An error if the request fails
 func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, bearer := geminiCreds(auth)
 
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
-	model := req.Model
-	if override := e.resolveUpstreamModel(model, auth); override != "" {
-		model = override
-	}
-
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -119,15 +117,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	body = ApplyThinkingMetadata(body, req.Metadata, model)
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	action := "generateContent"
 	if req.Metadata != nil {
@@ -136,7 +133,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		}
 	}
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action)
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -206,34 +203,30 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 
 // ExecuteStream performs a streaming request to the Gemini API.
 func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, bearer := geminiCreds(auth)
 
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
-	model := req.Model
-	if override := e.resolveUpstreamModel(model, auth); override != "" {
-		model = override
-	}
-
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
-	body = ApplyThinkingMetadata(body, req.Metadata, model)
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -331,27 +324,25 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 
 // CountTokens counts tokens for the given request using the Gemini API.
 func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	apiKey, bearer := geminiCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
-	model := req.Model
-	if override := e.resolveUpstreamModel(model, auth); override != "" {
-		model = override
-	}
+	apiKey, bearer := geminiCreds(auth)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model)
-	translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
-	translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "countTokens")
 
 	requestBody := bytes.NewReader(translatedReq)
 
@@ -450,51 +441,6 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string {
 	return base
 }
 
-func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveGeminiConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
 func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey {
 	if auth == nil || e.cfg == nil {
 		return nil
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index eebf6b1b..18d9f8d6 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -15,7 +15,7 @@ import (
 
 	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -155,30 +155,26 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut
 // executeWithServiceAccount handles authentication using service account credentials.
 // This method contains the original service account authentication logic.
 func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
-	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
-	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
-	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	action := "generateContent"
 	if req.Metadata != nil {
@@ -187,7 +183,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		}
 	}
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -258,35 +254,26 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 
 // executeWithAPIKey handles authentication using API key credentials.
 func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	action := "generateContent"
 	if req.Metadata != nil {
@@ -299,7 +286,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -367,33 +354,29 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 
 // executeStreamWithServiceAccount handles streaming authentication using service account credentials.
 func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
-	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
-	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
-	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -487,41 +470,32 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 
 // executeStreamWithAPIKey handles streaming authentication using API key credentials.
 func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
-	defer reporter.trackFailure(ctx, &err)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
-	}
-	body = util.ApplyDefaultThinkingIfNeeded(model, body)
-	body = util.NormalizeGeminiThinkingBudget(model, body)
-	body = util.StripThinkingConfigIfUnsupported(model, body)
-	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", model)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+
+	body = fixGeminiImageAspectRatio(baseModel, body)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -612,26 +586,24 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 
 // countTokensWithServiceAccount counts tokens using service account credentials.
 func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
-	}
-	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
-	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model)
+
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
 
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "countTokens")
 
 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -688,10 +660,6 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 		return cliproxyexecutor.Response{}, errRead
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
-	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
-		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
-	}
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
 	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
@@ -699,24 +667,17 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 
 // countTokensWithAPIKey handles token counting using API key credentials.
 func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
-	model := req.Model
-	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
-		model = override
-	}
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
-		if budgetOverride != nil {
-			norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-			budgetOverride = &norm
-		}
-		translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
-	}
-	translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
-	translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
-	translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
+
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+
+	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -726,7 +687,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens")
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens")
 
 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -780,10 +741,6 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 		return cliproxyexecutor.Response{}, errRead
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
-	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
-		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
-	}
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
 	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
@@ -870,53 +827,6 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau
 	return tok.AccessToken, nil
 }
 
-// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration.
-// It matches the requested model alias against configured models and returns the actual upstream name.
-func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	trimmed := strings.TrimSpace(alias)
-	if trimmed == "" {
-		return ""
-	}
-
-	entry := e.resolveVertexConfig(auth)
-	if entry == nil {
-		return ""
-	}
-
-	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
-
-	// Candidate names to match against configured aliases/names.
-	candidates := []string{strings.TrimSpace(normalizedModel)}
-	if !strings.EqualFold(normalizedModel, trimmed) {
-		candidates = append(candidates, trimmed)
-	}
-	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
-		candidates = append(candidates, original)
-	}
-
-	for i := range entry.Models {
-		model := entry.Models[i]
-		name := strings.TrimSpace(model.Name)
-		modelAlias := strings.TrimSpace(model.Alias)
-
-		for _, candidate := range candidates {
-			if candidate == "" {
-				continue
-			}
-			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
-				if name != "" {
-					return name
-				}
-				return candidate
-			}
-			if name != "" && strings.EqualFold(name, candidate) {
-				return name
-			}
-		}
-	}
-	return ""
-}
-
 // resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth.
 func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey {
 	if auth == nil || e.cfg == nil {
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index c8b7706c..cc158250 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -12,6 +12,7 @@ import (
 
 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -67,6 +68,8 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
 
 // Execute performs a non-streaming chat completion request.
 func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, baseURL := iflowCreds(auth)
 	if strings.TrimSpace(apiKey) == "" {
 		err = fmt.Errorf("iflow executor: missing api key")
@@ -76,7 +79,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		baseURL = iflowauth.DefaultAPIBaseURL
 	}
 
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
@@ -85,17 +88,14 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return resp, errValidate
-	}
-	body = applyIFlowThinkingConfig(body)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "iflow")
+
 	body = preserveReasoningContentInMessages(body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
 
@@ -154,6 +154,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	reporter.ensurePublished(ctx)
 
 	var param any
+	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
+	// the original model name in the response for client compatibility.
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
@@ -161,6 +163,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 
 // ExecuteStream performs a streaming chat completion request.
 func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	apiKey, baseURL := iflowCreds(auth)
 	if strings.TrimSpace(apiKey) == "" {
 		err = fmt.Errorf("iflow executor: missing api key")
@@ -170,7 +174,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		baseURL = iflowauth.DefaultAPIBaseURL
 	}
 
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
@@ -179,23 +183,19 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "iflow")
 
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return nil, errValidate
-	}
-	body = applyIFlowThinkingConfig(body)
 	body = preserveReasoningContentInMessages(body)
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
 
@@ -278,11 +278,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }
 
 func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	enc, err := tokenizerForModel(req.Model)
+	enc, err := tokenizerForModel(baseModel)
 	if err != nil {
 		return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err)
 	}
@@ -520,41 +522,3 @@ func preserveReasoningContentInMessages(body []byte) []byte {
 
 	return body
 }
-
-// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
-// This should be called after NormalizeThinkingConfig has processed the payload.
-//
-// Model-specific handling:
-//   - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
-//   - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
-func applyIFlowThinkingConfig(body []byte) []byte {
-	effort := gjson.GetBytes(body, "reasoning_effort")
-	if !effort.Exists() {
-		return body
-	}
-
-	model := strings.ToLower(gjson.GetBytes(body, "model").String())
-	val := strings.ToLower(strings.TrimSpace(effort.String()))
-	enableThinking := val != "none" && val != ""
-
-	// Remove reasoning_effort as we'll convert to model-specific format
-	body, _ = sjson.DeleteBytes(body, "reasoning_effort")
-	body, _ = sjson.DeleteBytes(body, "thinking")
-
-	// GLM-4.6/4.7: Use chat_template_kwargs
-	if strings.HasPrefix(model, "glm-4") {
-		body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
-		if enableThinking {
-			body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
-		}
-		return body
-	}
-
-	// MiniMax M2/M2.1: Use reasoning_split
-	if strings.HasPrefix(model, "minimax-m2") {
-		body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
-		return body
-	}
-
-	return body
-}
diff --git a/internal/runtime/executor/iflow_executor_test.go b/internal/runtime/executor/iflow_executor_test.go
new file mode 100644
index 00000000..e588548b
--- /dev/null
+++ b/internal/runtime/executor/iflow_executor_test.go
@@ -0,0 +1,67 @@
+package executor
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+)
+
+func TestIFlowExecutorParseSuffix(t *testing.T) {
+	tests := []struct {
+		name      string
+		model     string
+		wantBase  string
+		wantLevel string
+	}{
+		{"no suffix", "glm-4", "glm-4", ""},
+		{"glm with suffix", "glm-4.1-flash(high)", "glm-4.1-flash", "high"},
+		{"minimax no suffix", "minimax-m2", "minimax-m2", ""},
+		{"minimax with suffix", "minimax-m2.1(medium)", "minimax-m2.1", "medium"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := thinking.ParseSuffix(tt.model)
+			if result.ModelName != tt.wantBase {
+				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
+			}
+		})
+	}
+}
+
+func TestPreserveReasoningContentInMessages(t *testing.T) {
+	tests := []struct {
+		name  string
+		input []byte
+		want  []byte // nil means output should equal input
+	}{
+		{
+			"non-glm model passthrough",
+			[]byte(`{"model":"gpt-4","messages":[]}`),
+			nil,
+		},
+		{
+			"glm model with empty messages",
+			[]byte(`{"model":"glm-4","messages":[]}`),
+			nil,
+		},
+		{
+			"glm model preserves existing reasoning_content",
+			[]byte(`{"model":"glm-4","messages":[{"role":"assistant","content":"hi","reasoning_content":"thinking..."}]}`),
+			nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := preserveReasoningContentInMessages(tt.input)
+			want := tt.want
+			if want == nil {
+				want = tt.input
+			}
+			if string(got) != string(want) {
+				t.Errorf("preserveReasoningContentInMessages() = %s, want %s", got, want)
+			}
+		})
+	}
+}
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 04dbf23f..22e8b4c8 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -11,6 +11,7 @@ import (
 	"time"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -69,7 +70,9 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau
 }
 
 func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	baseURL, apiKey := e.resolveCredentials(auth)
@@ -85,19 +88,11 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
-	modelOverride := e.resolveUpstreamModel(req.Model, auth)
-	if modelOverride != "" {
-		translated = e.overrideModel(translated, modelOverride)
-	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
-	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
-	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
-	if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
-		return resp, errValidate
-	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
+
+	translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -168,7 +163,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 }
 
 func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	baseURL, apiKey := e.resolveCredentials(auth)
@@ -176,25 +173,18 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		err = statusErr{code: http.StatusUnauthorized, msg: "missing provider baseURL"}
 		return nil, err
 	}
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	modelOverride := e.resolveUpstreamModel(req.Model, auth)
-	if modelOverride != "" {
-		translated = e.overrideModel(translated, modelOverride)
-	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
-	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
-	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
-	if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
-		return nil, errValidate
-	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
+
+	translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -293,15 +283,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 }
 
 func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	modelForCounting := req.Model
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		translated = e.overrideModel(translated, modelOverride)
-		modelForCounting = modelOverride
-	}
+	modelForCounting := baseModel
+
+	translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
 
 	enc, err := tokenizerForModel(modelForCounting)
 	if err != nil {
@@ -336,53 +326,6 @@ func (e *OpenAICompatExecutor) resolveCredentials(auth *cliproxyauth.Auth) (base
 	return
 }
 
-func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	if alias == "" || auth == nil || e.cfg == nil {
-		return ""
-	}
-	compat := e.resolveCompatConfig(auth)
-	if compat == nil {
-		return ""
-	}
-	for i := range compat.Models {
-		model := compat.Models[i]
-		if model.Alias != "" {
-			if strings.EqualFold(model.Alias, alias) {
-				if model.Name != "" {
-					return model.Name
-				}
-				return alias
-			}
-			continue
-		}
-		if strings.EqualFold(model.Name, alias) {
-			return model.Name
-		}
-	}
-	return ""
-}
-
-func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
-	trimmed := strings.TrimSpace(model)
-	if trimmed == "" || e == nil || e.cfg == nil {
-		return false
-	}
-	compat := e.resolveCompatConfig(auth)
-	if compat == nil || len(compat.Models) == 0 {
-		return false
-	}
-	for i := range compat.Models {
-		entry := compat.Models[i]
-		if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
-			return true
-		}
-		if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
-			return true
-		}
-	}
-	return false
-}
-
 func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
 	if auth == nil || e.cfg == nil {
 		return nil
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index e3cfc5d4..9014af87 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -1,109 +1,13 @@
 package executor
 
 import (
-	"fmt"
-	"net/http"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
-// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
-// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
-func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
-	// Use the alias from metadata if available, as it's registered in the global registry
-	// with thinking metadata; the upstream model name may not be registered.
-	lookupModel := util.ResolveOriginalModel(model, metadata)
-
-	// Determine which model to use for thinking support check.
-	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
-	thinkingModel := lookupModel
-	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
-		thinkingModel = model
-	}
-
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
-	if !ok || (budgetOverride == nil && includeOverride == nil) {
-		return payload
-	}
-	if !util.ModelSupportsThinking(thinkingModel) {
-		return payload
-	}
-	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
-		budgetOverride = &norm
-	}
-	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
-}
-
-// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
-// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
-func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
-	// Use the alias from metadata if available, as it's registered in the global registry
-	// with thinking metadata; the upstream model name may not be registered.
-	lookupModel := util.ResolveOriginalModel(model, metadata)
-
-	// Determine which model to use for thinking support check.
-	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
-	thinkingModel := lookupModel
-	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
-		thinkingModel = model
-	}
-
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
-	if !ok || (budgetOverride == nil && includeOverride == nil) {
-		return payload
-	}
-	if !util.ModelSupportsThinking(thinkingModel) {
-		return payload
-	}
-	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
-		budgetOverride = &norm
-	}
-	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
-}
-
-// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
-// Metadata values take precedence over any existing field when the model supports thinking, intentionally
-// overwriting caller-provided values to honor suffix/default metadata priority.
-func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
-	if len(metadata) == 0 {
-		return payload
-	}
-	if field == "" {
-		return payload
-	}
-	baseModel := util.ResolveOriginalModel(model, metadata)
-	if baseModel == "" {
-		baseModel = model
-	}
-	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
-		return payload
-	}
-	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-				return updated
-			}
-		}
-	}
-	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
-	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-			if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
-				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-					return updated
-				}
-			}
-		}
-	}
-	return payload
-}
-
 // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
 // paths as relative to the provided root path (for example, "request" for Gemini CLI)
 // and restricts matches to the given protocol when supplied. Defaults are checked
@@ -256,102 +160,3 @@ func matchModelPattern(pattern, model string) bool {
 	}
 	return pi == len(pattern)
 }
-
-// NormalizeThinkingConfig normalizes thinking-related fields in the payload
-// based on model capabilities. For models without thinking support, it strips
-// reasoning fields. For models with level-based thinking, it validates and
-// normalizes the reasoning effort level. For models with numeric budget thinking,
-// it strips the effort string fields.
-func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
-	if len(payload) == 0 || model == "" {
-		return payload
-	}
-
-	if !util.ModelSupportsThinking(model) {
-		if allowCompat {
-			return payload
-		}
-		return StripThinkingFields(payload, false)
-	}
-
-	if util.ModelUsesThinkingLevels(model) {
-		return NormalizeReasoningEffortLevel(payload, model)
-	}
-
-	// Model supports thinking but uses numeric budgets, not levels.
-	// Strip effort string fields since they are not applicable.
-	return StripThinkingFields(payload, true)
-}
-
-// StripThinkingFields removes thinking-related fields from the payload for
-// models that do not support thinking. If effortOnly is true, only removes
-// effort string fields (for models using numeric budgets).
-func StripThinkingFields(payload []byte, effortOnly bool) []byte {
-	fieldsToRemove := []string{
-		"reasoning_effort",
-		"reasoning.effort",
-	}
-	if !effortOnly {
-		fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...)
-	}
-	out := payload
-	for _, field := range fieldsToRemove {
-		if gjson.GetBytes(out, field).Exists() {
-			out, _ = sjson.DeleteBytes(out, field)
-		}
-	}
-	return out
-}
-
-// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
-// or reasoning.effort field for level-based thinking models.
-func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
-	out := payload
-
-	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
-		}
-	}
-
-	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
-		}
-	}
-
-	return out
-}
-
-// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
-// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
-// downgrading requests.
-func ValidateThinkingConfig(payload []byte, model string) error {
-	if len(payload) == 0 || model == "" {
-		return nil
-	}
-	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
-		return nil
-	}
-
-	levels := util.GetModelThinkingLevels(model)
-	checkField := func(path string) error {
-		if effort := gjson.GetBytes(payload, path); effort.Exists() {
-			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
-				return statusErr{
-					code: http.StatusBadRequest,
-					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
-				}
-			}
-		}
-		return nil
-	}
-
-	if err := checkField("reasoning_effort"); err != nil {
-		return err
-	}
-	if err := checkField("reasoning.effort"); err != nil {
-		return err
-	}
-	return nil
-}
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index ee014fc7..f7162893 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -12,6 +12,7 @@ import (
 
 	qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -65,12 +66,14 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
 }
 
 func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	token, baseURL := qwenCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
+	token, baseURL := qwenCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://portal.qwen.ai/v1"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
@@ -79,15 +82,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return resp, errValidate
-	}
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "openai")
+
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -140,18 +141,22 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseOpenAIUsage(data))
 	var param any
+	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
+	// the original model name in the response for client compatibility.
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
 func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
-	token, baseURL := qwenCreds(auth)
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
+	token, baseURL := qwenCreds(auth)
 	if baseURL == "" {
 		baseURL = "https://portal.qwen.ai/v1"
 	}
-	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
@@ -160,15 +165,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body, _ = sjson.SetBytes(body, "model", baseModel)
+
+	body, _ = thinking.ApplyThinking(body, req.Model, "openai")
 
-	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
-	body, _ = sjson.SetBytes(body, "model", req.Model)
-	body = NormalizeThinkingConfig(body, req.Model, false)
-	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
-		return nil, errValidate
-	}
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.
@@ -176,7 +178,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
 	}
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -256,13 +258,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 }
 
 func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	modelName := gjson.GetBytes(body, "model").String()
 	if strings.TrimSpace(modelName) == "" {
-		modelName = req.Model
+		modelName = baseModel
 	}
 
 	enc, err := tokenizerForModel(modelName)
diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go
new file mode 100644
index 00000000..6a777c53
--- /dev/null
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -0,0 +1,30 @@
+package executor
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+)
+
+func TestQwenExecutorParseSuffix(t *testing.T) {
+	tests := []struct {
+		name      string
+		model     string
+		wantBase  string
+		wantLevel string
+	}{
+		{"no suffix", "qwen-max", "qwen-max", ""},
+		{"with level suffix", "qwen-max(high)", "qwen-max", "high"},
+		{"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"},
+		{"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := thinking.ParseSuffix(tt.model)
+			if result.ModelName != tt.wantBase {
+				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
+			}
+		})
+	}
+}
diff --git a/internal/runtime/executor/thinking_providers.go b/internal/runtime/executor/thinking_providers.go
new file mode 100644
index 00000000..99ac468d
--- /dev/null
+++ b/internal/runtime/executor/thinking_providers.go
@@ -0,0 +1,10 @@
+package executor
+
+import (
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
+)
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
new file mode 100644
index 00000000..8ee60b8d
--- /dev/null
+++ b/internal/thinking/apply.go
@@ -0,0 +1,430 @@
+// Package thinking provides unified thinking configuration processing.
+package thinking
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+)
+
+// providerAppliers maps provider names to their ProviderApplier implementations.
+var providerAppliers = map[string]ProviderApplier{
+	"gemini":      nil,
+	"gemini-cli":  nil,
+	"claude":      nil,
+	"openai":      nil,
+	"codex":       nil,
+	"iflow":       nil,
+	"antigravity": nil,
+}
+
+// GetProviderApplier returns the ProviderApplier for the given provider name.
+// Returns nil if the provider is not registered.
+func GetProviderApplier(provider string) ProviderApplier {
+	return providerAppliers[provider]
+}
+
+// RegisterProvider registers a provider applier by name.
+func RegisterProvider(name string, applier ProviderApplier) {
+	providerAppliers[name] = applier
+}
+
+// IsUserDefinedModel reports whether the model is a user-defined model that should
+// have thinking configuration passed through without validation.
+//
+// User-defined models are configured via config file's models[] array
+// (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models
+// are marked with UserDefined=true at registration time.
+//
+// User-defined models should have their thinking configuration applied directly,
+// letting the upstream service validate the configuration.
+func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
+	if modelInfo == nil {
+		return false
+	}
+	return modelInfo.UserDefined
+}
+
+// ApplyThinking applies thinking configuration to a request body.
+//
+// This is the unified entry point for all providers. It follows the processing
+// order defined in FR25: route check → model capability query → config extraction
+// → validation → application.
+//
+// Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
+// the suffix configuration takes priority over any thinking parameters in the request body.
+// This enables users to override thinking settings via the model name without modifying their
+// request payload.
+//
+// Parameters:
+//   - body: Original request body JSON
+//   - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
+//   - provider: Provider name (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
+//
+// Returns:
+//   - Modified request body JSON with thinking configuration applied
+//   - Error if validation fails (ThinkingError). On error, the original body
+//     is returned (not nil) to enable defensive programming patterns.
+//
+// Passthrough behavior (returns original body without error):
+//   - Unknown provider (not in providerAppliers map)
+//   - modelInfo is nil (model not found in registry)
+//   - modelInfo.Thinking is nil (model doesn't support thinking)
+//
+// Example:
+//
+//	// With suffix - suffix config takes priority
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini")
+//
+//	// Without suffix - uses body config
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini")
+func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
+	// 1. Route check: Get provider applier
+	applier := GetProviderApplier(provider)
+	if applier == nil {
+		log.WithField("provider", provider).Debug("thinking: unknown provider, passthrough")
+		return body, nil
+	}
+
+	// 2. Parse suffix and get modelInfo
+	suffixResult := ParseSuffix(model)
+	baseModel := suffixResult.ModelName
+	modelInfo := registry.GetGlobalRegistry().GetModelInfo(baseModel)
+
+	// 3. Model capability check
+	if modelInfo == nil {
+		log.WithField("model", model).Debug("thinking: nil modelInfo, passthrough")
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if IsUserDefinedModel(modelInfo) {
+			return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
+		}
+		config := extractThinkingConfig(body, provider)
+		if hasThinkingConfig(config) {
+			log.WithFields(log.Fields{
+				"model":    modelInfo.ID,
+				"provider": provider,
+			}).Debug("thinking: model does not support thinking, stripping config")
+			return StripThinkingConfig(body, provider), nil
+		}
+		log.WithField("model", modelInfo.ID).Debug("thinking: model does not support thinking, passthrough")
+		return body, nil
+	}
+
+	// 4. Get config: suffix priority over body
+	var config ThinkingConfig
+	if suffixResult.HasSuffix {
+		config = parseSuffixToConfig(suffixResult.RawSuffix)
+		log.WithFields(log.Fields{
+			"provider":   provider,
+			"model":      model,
+			"raw_suffix": suffixResult.RawSuffix,
+			"config":     config,
+		}).Debug("thinking: using suffix config (priority)")
+	} else {
+		config = extractThinkingConfig(body, provider)
+		log.WithFields(log.Fields{
+			"provider": provider,
+			"model":    modelInfo.ID,
+			"config":   config,
+		}).Debug("thinking: extracted config from request body")
+	}
+
+	if !hasThinkingConfig(config) {
+		log.WithFields(log.Fields{
+			"provider": provider,
+			"model":    modelInfo.ID,
+		}).Debug("thinking: no config found, passthrough")
+		return body, nil
+	}
+
+	// 5. Validate and normalize configuration
+	validated, err := ValidateConfig(config, modelInfo.Thinking)
+	if err != nil {
+		log.WithFields(log.Fields{
+			"provider": provider,
+			"model":    modelInfo.ID,
+			"error":    err.Error(),
+		}).Warn("thinking: validation failed, returning original body")
+		// Return original body on validation failure (defensive programming).
+		// This ensures callers who ignore the error won't receive nil body.
+		// The upstream service will decide how to handle the unmodified request.
+		return body, err
+	}
+
+	// Defensive check: ValidateConfig should never return (nil, nil)
+	if validated == nil {
+		log.WithFields(log.Fields{
+			"provider": provider,
+			"model":    modelInfo.ID,
+		}).Warn("thinking: ValidateConfig returned nil config without error, passthrough")
+		return body, nil
+	}
+
+	log.WithFields(log.Fields{
+		"provider":  provider,
+		"model":     modelInfo.ID,
+		"validated": *validated,
+	}).Debug("thinking: applying validated config")
+
+	// 6. Apply configuration using provider-specific applier
+	return applier.Apply(body, *validated, modelInfo)
+}
+
+// parseSuffixToConfig converts a raw suffix string to ThinkingConfig.
+//
+// Parsing priority:
+//  1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto
+//  2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel
+//  3. Numeric values: positive integers → ModeBudget, 0 → ModeNone
+//
+// If none of the above match, returns empty ThinkingConfig (treated as no config).
+func parseSuffixToConfig(rawSuffix string) ThinkingConfig {
+	// 1. Try special values first (none, auto, -1)
+	if mode, ok := ParseSpecialSuffix(rawSuffix); ok {
+		switch mode {
+		case ModeNone:
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case ModeAuto:
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		}
+	}
+
+	// 2. Try level parsing (minimal, low, medium, high, xhigh)
+	if level, ok := ParseLevelSuffix(rawSuffix); ok {
+		return ThinkingConfig{Mode: ModeLevel, Level: level}
+	}
+
+	// 3. Try numeric parsing
+	if budget, ok := ParseNumericSuffix(rawSuffix); ok {
+		if budget == 0 {
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		}
+		return ThinkingConfig{Mode: ModeBudget, Budget: budget}
+	}
+
+	// Unknown suffix format - return empty config
+	log.WithField("raw_suffix", rawSuffix).Debug("thinking: unknown suffix format, treating as no config")
+	return ThinkingConfig{}
+}
+
+// applyUserDefinedModel applies thinking configuration for user-defined models
+// without ThinkingSupport validation.
+func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider string, suffixResult SuffixResult) ([]byte, error) {
+	// Get config: suffix priority over body
+	var config ThinkingConfig
+	if suffixResult.HasSuffix {
+		config = parseSuffixToConfig(suffixResult.RawSuffix)
+	} else {
+		config = extractThinkingConfig(body, provider)
+	}
+
+	if !hasThinkingConfig(config) {
+		log.WithFields(log.Fields{
+			"model":        modelInfo.ID,
+			"provider":     provider,
+			"user_defined": true,
+			"passthrough":  true,
+		}).Debug("thinking: user-defined model, no config, passthrough")
+		return body, nil
+	}
+
+	applier := GetProviderApplier(provider)
+	if applier == nil {
+		log.WithFields(log.Fields{
+			"model":        modelInfo.ID,
+			"provider":     provider,
+			"user_defined": true,
+			"passthrough":  true,
+		}).Debug("thinking: user-defined model, unknown provider, passthrough")
+		return body, nil
+	}
+
+	log.WithFields(log.Fields{
+		"model":        modelInfo.ID,
+		"provider":     provider,
+		"user_defined": true,
+		"passthrough":  false,
+		"config":       config,
+	}).Debug("thinking: applying config for user-defined model (skip validation)")
+
+	return applier.Apply(body, config, modelInfo)
+}
+
+// extractThinkingConfig extracts provider-specific thinking config from request body.
+func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		return ThinkingConfig{}
+	}
+
+	switch provider {
+	case "claude":
+		return extractClaudeConfig(body)
+	case "gemini", "gemini-cli", "antigravity":
+		return extractGeminiConfig(body, provider)
+	case "openai":
+		return extractOpenAIConfig(body)
+	case "codex":
+		return extractCodexConfig(body)
+	case "iflow":
+		return extractIFlowConfig(body)
+	default:
+		return ThinkingConfig{}
+	}
+}
+
+func hasThinkingConfig(config ThinkingConfig) bool {
+	return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
+}
+
+// extractClaudeConfig extracts thinking configuration from Claude format request body.
+//
+// Claude API format:
+//   - thinking.type: "enabled" or "disabled"
+//   - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget)
+//
+// Priority: thinking.type="disabled" takes precedence over budget_tokens.
+// When type="enabled" without budget_tokens, returns ModeAuto to indicate
+// the user wants thinking enabled but didn't specify a budget.
+func extractClaudeConfig(body []byte) ThinkingConfig {
+	thinkingType := gjson.GetBytes(body, "thinking.type").String()
+	if thinkingType == "disabled" {
+		return ThinkingConfig{Mode: ModeNone, Budget: 0}
+	}
+
+	// Check budget_tokens
+	if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() {
+		value := int(budget.Int())
+		switch value {
+		case 0:
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case -1:
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		default:
+			return ThinkingConfig{Mode: ModeBudget, Budget: value}
+		}
+	}
+
+	// If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified)
+	if thinkingType == "enabled" {
+		return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractGeminiConfig extracts thinking configuration from Gemini format request body.
+//
+// Gemini API format:
+//   - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3)
+//   - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5)
+//
+// For gemini-cli and antigravity providers, the path is prefixed with "request.".
+//
+// Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format).
+// This allows newer Gemini 3 level-based configs to take precedence.
+func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
+	prefix := "generationConfig.thinkingConfig"
+	if provider == "gemini-cli" || provider == "antigravity" {
+		prefix = "request.generationConfig.thinkingConfig"
+	}
+
+	// Check thinkingLevel first (Gemini 3 format takes precedence)
+	if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
+		value := level.String()
+		switch value {
+		case "none":
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case "auto":
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		default:
+			return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
+		}
+	}
+
+	// Check thinkingBudget (Gemini 2.5 format)
+	if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
+		value := int(budget.Int())
+		switch value {
+		case 0:
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		case -1:
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+		default:
+			return ThinkingConfig{Mode: ModeBudget, Budget: value}
+		}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractOpenAIConfig extracts thinking configuration from OpenAI format request body.
+//
+// OpenAI API format:
+//   - reasoning_effort: "none", "low", "medium", "high" (discrete levels)
+//
+// OpenAI uses level-based thinking configuration only, no numeric budget support.
+// The "none" value is treated specially to return ModeNone.
+func extractOpenAIConfig(body []byte) ThinkingConfig {
+	// Check reasoning_effort (OpenAI Chat Completions format)
+	if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() {
+		value := effort.String()
+		if value == "none" {
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		}
+		return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractCodexConfig extracts thinking configuration from Codex format request body.
+//
+// Codex API format (OpenAI Responses API):
+//   - reasoning.effort: "none", "low", "medium", "high"
+//
+// This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort".
+func extractCodexConfig(body []byte) ThinkingConfig {
+	// Check reasoning.effort (Codex / OpenAI Responses API format)
+	if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() {
+		value := effort.String()
+		if value == "none" {
+			return ThinkingConfig{Mode: ModeNone, Budget: 0}
+		}
+		return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)}
+	}
+
+	return ThinkingConfig{}
+}
+
+// extractIFlowConfig extracts thinking configuration from iFlow format request body.
+//
+// iFlow API format (supports multiple model families):
+//   - GLM format: chat_template_kwargs.enable_thinking (boolean)
+//   - MiniMax format: reasoning_split (boolean)
+//
+// Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled".
+// The actual budget/configuration is determined by the iFlow applier based on model capabilities.
+// Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off.
+func extractIFlowConfig(body []byte) ThinkingConfig {
+	// GLM format: chat_template_kwargs.enable_thinking
+	if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() {
+		if enabled.Bool() {
+			// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
+			return ThinkingConfig{Mode: ModeBudget, Budget: 1}
+		}
+		return ThinkingConfig{Mode: ModeNone, Budget: 0}
+	}
+
+	// MiniMax format: reasoning_split
+	if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() {
+		if split.Bool() {
+			// Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets)
+			return ThinkingConfig{Mode: ModeBudget, Budget: 1}
+		}
+		return ThinkingConfig{Mode: ModeNone, Budget: 0}
+	}
+
+	return ThinkingConfig{}
+}
diff --git a/internal/thinking/apply_main_test.go b/internal/thinking/apply_main_test.go
new file mode 100644
index 00000000..93346109
--- /dev/null
+++ b/internal/thinking/apply_main_test.go
@@ -0,0 +1,144 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/tidwall/gjson"
+)
+
+// setupTestModels registers test models in the global registry for testing.
+// This is required because ApplyThinking now looks up models by name.
+func setupTestModels(t *testing.T) func() {
+	t.Helper()
+	reg := registry.GetGlobalRegistry()
+
+	// Register test models via RegisterClient (the correct API)
+	clientID := "test-thinking-client"
+	testModels := []*registry.ModelInfo{
+		{ID: "test-thinking-model", Thinking: &registry.ThinkingSupport{Min: 1, Max: 10}},
+		{ID: "test-no-thinking", Type: "gemini"},
+		{ID: "gpt-5.2-test", Thinking: &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "medium", "high"}}},
+	}
+
+	reg.RegisterClient(clientID, "test", testModels)
+
+	// Return cleanup function
+	return func() {
+		reg.UnregisterClient(clientID)
+	}
+}
+
+func TestApplyThinkingPassthrough(t *testing.T) {
+	cleanup := setupTestModels(t)
+	defer cleanup()
+
+	tests := []struct {
+		name     string
+		body     string
+		model    string
+		provider string
+	}{
+		{"unknown provider", `{"a":1}`, "test-thinking-model", "unknown"},
+		{"unknown model", `{"a":1}`, "nonexistent-model", "gemini"},
+		{"nil thinking support", `{"a":1}`, "test-no-thinking", "gemini"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err != nil {
+				t.Fatalf("ApplyThinking() error = %v", err)
+			}
+			if string(got) != tt.body {
+				t.Fatalf("ApplyThinking() = %s, want %s", string(got), tt.body)
+			}
+		})
+	}
+}
+
+func TestApplyThinkingValidationError(t *testing.T) {
+	cleanup := setupTestModels(t)
+	defer cleanup()
+
+	tests := []struct {
+		name     string
+		body     string
+		model    string
+		provider string
+	}{
+		{"unsupported level", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err == nil {
+				t.Fatalf("ApplyThinking() error = nil, want error")
+			}
+			// On validation error, ApplyThinking returns original body (defensive programming)
+			if string(got) != tt.body {
+				t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body)
+			}
+		})
+	}
+}
+
+func TestApplyThinkingSuffixPriority(t *testing.T) {
+	cleanup := setupTestModels(t)
+	defer cleanup()
+
+	// Register a model that supports thinking with budget
+	reg := registry.GetGlobalRegistry()
+	suffixClientID := "test-suffix-client"
+	testModels := []*registry.ModelInfo{
+		{
+			ID:       "gemini-2.5-pro-suffix-test",
+			Thinking: &registry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true},
+		},
+	}
+	reg.RegisterClient(suffixClientID, "gemini", testModels)
+	defer reg.UnregisterClient(suffixClientID)
+
+	tests := []struct {
+		name          string
+		body          string
+		model         string
+		provider      string
+		checkPath     string
+		expectedValue int
+	}{
+		{
+			"suffix overrides body config",
+			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
+			"gemini-2.5-pro-suffix-test(8192)",
+			"gemini",
+			"generationConfig.thinkingConfig.thinkingBudget",
+			8192,
+		},
+		{
+			"suffix none disables thinking",
+			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
+			"gemini-2.5-pro-suffix-test(none)",
+			"gemini",
+			"generationConfig.thinkingConfig.thinkingBudget",
+			0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err != nil {
+				t.Fatalf("ApplyThinking() error = %v", err)
+			}
+
+			// Use gjson to check the value
+			result := int(gjson.GetBytes(got, tt.checkPath).Int())
+			if result != tt.expectedValue {
+				t.Fatalf("ApplyThinking() %s = %v, want %v", tt.checkPath, result, tt.expectedValue)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/apply_test.go b/internal/thinking/apply_test.go
new file mode 100644
index 00000000..d89fff32
--- /dev/null
+++ b/internal/thinking/apply_test.go
@@ -0,0 +1,501 @@
+// Package thinking_test provides external tests for the thinking package.
+package thinking_test
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
+)
+
+// registerTestModels sets up test models in the registry and returns a cleanup function.
+func registerTestModels(t *testing.T) func() {
+	t.Helper()
+	reg := registry.GetGlobalRegistry()
+
+	testModels := []*registry.ModelInfo{
+		geminiBudgetModel(),
+		geminiLevelModel(),
+		claudeBudgetModel(),
+		openAILevelModel(),
+		iFlowModel(),
+		{ID: "claude-3"},
+		{ID: "gemini-2.5-pro-strip"},
+		{ID: "glm-4.6-strip"},
+	}
+
+	clientID := "test-thinking-models"
+	reg.RegisterClient(clientID, "test", testModels)
+
+	return func() {
+		reg.UnregisterClient(clientID)
+	}
+}
+
+// TestApplyThinking tests the main ApplyThinking entry point.
+//
+// ApplyThinking is the unified entry point for applying thinking configuration.
+// It routes to the appropriate provider-specific applier based on model.
+//
+// Depends on: Epic 10 Story 10-2 (apply-thinking main entry)
+func TestApplyThinking(t *testing.T) {
+	cleanup := registerTestModels(t)
+	defer cleanup()
+
+	tests := []struct {
+		name     string
+		body     string
+		model    string
+		provider string
+		check    string
+	}{
+		{"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini-2.5-pro-test", "gemini", "geminiBudget"},
+		{"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini-3-pro-preview-test", "gemini", "geminiLevel"},
+		{"gemini-cli budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "gemini-cli", "geminiCliBudget"},
+		{"antigravity budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "antigravity", "geminiCliBudget"},
+		{"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude-sonnet-4-5-test", "claude", "claudeBudget"},
+		{"claude enabled type auto", `{"thinking":{"type":"enabled"}}`, "claude-sonnet-4-5-test", "claude", "claudeAuto"},
+		{"openai level", `{"reasoning_effort":"high"}`, "gpt-5.2-test", "openai", "openaiLevel"},
+		{"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "glm-4.6-test", "iflow", "iflowEnable"},
+		{"unknown provider passthrough", `{"a":1}`, "gemini-2.5-pro-test", "unknown", "passthrough"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err != nil {
+				t.Fatalf("ApplyThinking() error = %v", err)
+			}
+			assertApplyThinkingCheck(t, tt.check, tt.body, got)
+		})
+	}
+}
+
+func TestApplyThinkingErrors(t *testing.T) {
+	cleanup := registerTestModels(t)
+	defer cleanup()
+
+	tests := []struct {
+		name     string
+		body     string
+		model    string
+		provider string
+	}{
+		{"unsupported level openai", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"},
+		{"unsupported level gemini", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"ultra"}}}`, "gemini-3-pro-preview-test", "gemini"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err == nil {
+				t.Fatalf("ApplyThinking() error = nil, want error")
+			}
+			// On validation error, ApplyThinking returns original body (defensive programming)
+			if string(got) != tt.body {
+				t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body)
+			}
+		})
+	}
+}
+
+func TestApplyThinkingStripOnUnsupportedModel(t *testing.T) {
+	cleanup := registerTestModels(t)
+	defer cleanup()
+
+	tests := []struct {
+		name      string
+		body      string
+		model     string
+		provider  string
+		stripped  []string
+		preserved []string
+	}{
+		{"claude strip", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude-3", "claude", []string{"thinking"}, []string{"model"}},
+		{"gemini strip", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini-2.5-pro-strip", "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}},
+		{"iflow strip", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "glm-4.6-strip", "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err != nil {
+				t.Fatalf("ApplyThinking() error = %v", err)
+			}
+
+			for _, path := range tt.stripped {
+				if gjson.GetBytes(got, path).Exists() {
+					t.Fatalf("expected %s to be stripped, got %s", path, string(got))
+				}
+			}
+			for _, path := range tt.preserved {
+				if !gjson.GetBytes(got, path).Exists() {
+					t.Fatalf("expected %s to be preserved, got %s", path, string(got))
+				}
+			}
+		})
+	}
+}
+
+func TestIsUserDefinedModel(t *testing.T) {
+	tests := []struct {
+		name      string
+		modelInfo *registry.ModelInfo
+		want      bool
+	}{
+		{"nil modelInfo", nil, false},
+		{"not user-defined no flag", &registry.ModelInfo{ID: "test"}, false},
+		{"not user-defined with type", &registry.ModelInfo{ID: "test", Type: "openai"}, false},
+		{"user-defined with flag", &registry.ModelInfo{ID: "test", Type: "openai", UserDefined: true}, true},
+		{"user-defined flag only", &registry.ModelInfo{ID: "test", UserDefined: true}, true},
+		{"has thinking not user-defined", &registry.ModelInfo{ID: "test", Type: "openai", Thinking: &registry.ThinkingSupport{Min: 1024}}, false},
+		{"has thinking with user-defined flag", &registry.ModelInfo{ID: "test", Type: "openai", Thinking: &registry.ThinkingSupport{Min: 1024}, UserDefined: true}, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := thinking.IsUserDefinedModel(tt.modelInfo); got != tt.want {
+				t.Fatalf("IsUserDefinedModel() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestApplyThinking_UserDefinedModel(t *testing.T) {
+	// Register user-defined test models
+	reg := registry.GetGlobalRegistry()
+	userDefinedModels := []*registry.ModelInfo{
+		{ID: "custom-gpt", Type: "openai", UserDefined: true},
+		{ID: "or-claude", Type: "openai", UserDefined: true},
+		{ID: "custom-gemini", Type: "gemini", UserDefined: true},
+		{ID: "vertex-flash", Type: "gemini", UserDefined: true},
+		{ID: "cli-gemini", Type: "gemini", UserDefined: true},
+		{ID: "ag-gemini", Type: "gemini", UserDefined: true},
+		{ID: "custom-claude", Type: "claude", UserDefined: true},
+		{ID: "unknown"},
+	}
+	clientID := "test-user-defined-models"
+	reg.RegisterClient(clientID, "test", userDefinedModels)
+	defer reg.UnregisterClient(clientID)
+
+	tests := []struct {
+		name     string
+		body     string
+		model    string
+		provider string
+		check    string
+	}{
+		{
+			"openai user-defined with reasoning_effort",
+			`{"model":"custom-gpt","reasoning_effort":"high"}`,
+			"custom-gpt",
+			"openai",
+			"openaiCompatible",
+		},
+		{
+			"openai-compatibility model with reasoning_effort",
+			`{"model":"or-claude","reasoning_effort":"high"}`,
+			"or-claude",
+			"openai",
+			"openaiCompatible",
+		},
+		{
+			"gemini user-defined with thinkingBudget",
+			`{"model":"custom-gemini","generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			"custom-gemini",
+			"gemini",
+			"geminiCompatibleBudget",
+		},
+		{
+			"vertex user-defined with thinkingBudget",
+			`{"model":"vertex-flash","generationConfig":{"thinkingConfig":{"thinkingBudget":16384}}}`,
+			"vertex-flash",
+			"gemini",
+			"geminiCompatibleBudget16384",
+		},
+		{
+			"gemini-cli user-defined with thinkingBudget",
+			`{"model":"cli-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
+			"cli-gemini",
+			"gemini-cli",
+			"geminiCliCompatibleBudget",
+		},
+		{
+			"antigravity user-defined with thinkingBudget",
+			`{"model":"ag-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
+			"ag-gemini",
+			"antigravity",
+			"geminiCliCompatibleBudget",
+		},
+		{
+			"claude user-defined with thinking",
+			`{"model":"custom-claude","thinking":{"type":"enabled","budget_tokens":8192}}`,
+			"custom-claude",
+			"claude",
+			"claudeCompatibleBudget",
+		},
+		{
+			"user-defined model no config",
+			`{"model":"custom-gpt","messages":[]}`,
+			"custom-gpt",
+			"openai",
+			"passthrough",
+		},
+		{
+			"non-user-defined model strips config",
+			`{"model":"unknown","reasoning_effort":"high"}`,
+			"unknown",
+			"openai",
+			"stripReasoning",
+		},
+		{
+			"user-defined model unknown provider",
+			`{"model":"custom-gpt","reasoning_effort":"high"}`,
+			"custom-gpt",
+			"unknown",
+			"passthrough",
+		},
+		{
+			"unknown model passthrough",
+			`{"model":"nonexistent","reasoning_effort":"high"}`,
+			"nonexistent",
+			"openai",
+			"passthrough",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err != nil {
+				t.Fatalf("ApplyThinking() error = %v", err)
+			}
+			assertCompatibleModelCheck(t, tt.check, tt.body, got)
+		})
+	}
+}
+
+// TestApplyThinkingSuffixPriority tests suffix priority over body config.
+func TestApplyThinkingSuffixPriority(t *testing.T) {
+	// Register test model
+	reg := registry.GetGlobalRegistry()
+	testModels := []*registry.ModelInfo{
+		{
+			ID:       "gemini-suffix-test",
+			Thinking: &registry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true},
+		},
+	}
+	clientID := "test-suffix-priority"
+	reg.RegisterClient(clientID, "gemini", testModels)
+	defer reg.UnregisterClient(clientID)
+
+	tests := []struct {
+		name          string
+		body          string
+		model         string
+		provider      string
+		checkPath     string
+		expectedValue int
+	}{
+		{
+			"suffix overrides body budget",
+			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
+			"gemini-suffix-test(8192)",
+			"gemini",
+			"generationConfig.thinkingConfig.thinkingBudget",
+			8192,
+		},
+		{
+			"suffix none sets budget to 0",
+			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
+			"gemini-suffix-test(none)",
+			"gemini",
+			"generationConfig.thinkingConfig.thinkingBudget",
+			0,
+		},
+		{
+			"no suffix uses body config",
+			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":5000}}}`,
+			"gemini-suffix-test",
+			"gemini",
+			"generationConfig.thinkingConfig.thinkingBudget",
+			5000,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
+			if err != nil {
+				t.Fatalf("ApplyThinking() error = %v", err)
+			}
+
+			result := int(gjson.GetBytes(got, tt.checkPath).Int())
+			if result != tt.expectedValue {
+				t.Fatalf("ApplyThinking() %s = %v, want %v\nbody: %s", tt.checkPath, result, tt.expectedValue, string(got))
+			}
+		})
+	}
+}
+
+func assertApplyThinkingCheck(t *testing.T, checkName, input string, body []byte) {
+	t.Helper()
+
+	switch checkName {
+	case "geminiBudget":
+		assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192)
+		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
+	case "geminiLevel":
+		assertJSONString(t, body, "generationConfig.thinkingConfig.thinkingLevel", "high")
+		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
+	case "geminiCliBudget":
+		assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
+		assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true)
+	case "claudeBudget":
+		assertJSONString(t, body, "thinking.type", "enabled")
+		assertJSONInt(t, body, "thinking.budget_tokens", 16384)
+	case "claudeAuto":
+		// When type=enabled without budget, auto mode is applied using mid-range budget
+		assertJSONString(t, body, "thinking.type", "enabled")
+		// Budget should be mid-range: (1024 + 128000) / 2 = 64512
+		assertJSONInt(t, body, "thinking.budget_tokens", 64512)
+	case "openaiLevel":
+		assertJSONString(t, body, "reasoning_effort", "high")
+	case "iflowEnable":
+		assertJSONBool(t, body, "chat_template_kwargs.enable_thinking", true)
+		assertJSONBool(t, body, "chat_template_kwargs.clear_thinking", false)
+	case "passthrough":
+		if string(body) != input {
+			t.Fatalf("ApplyThinking() = %s, want %s", string(body), input)
+		}
+	default:
+		t.Fatalf("unknown check: %s", checkName)
+	}
+}
+
+func assertCompatibleModelCheck(t *testing.T, checkName, input string, body []byte) {
+	t.Helper()
+
+	switch checkName {
+	case "openaiCompatible":
+		assertJSONString(t, body, "reasoning_effort", "high")
+	case "geminiCompatibleBudget":
+		assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192)
+		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
+	case "geminiCompatibleBudget16384":
+		assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 16384)
+		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
+	case "geminiCliCompatibleBudget":
+		assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
+		assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true)
+	case "claudeCompatibleBudget":
+		assertJSONString(t, body, "thinking.type", "enabled")
+		assertJSONInt(t, body, "thinking.budget_tokens", 8192)
+	case "stripReasoning":
+		if gjson.GetBytes(body, "reasoning_effort").Exists() {
+			t.Fatalf("expected reasoning_effort to be stripped, got %s", string(body))
+		}
+	case "passthrough":
+		if string(body) != input {
+			t.Fatalf("ApplyThinking() = %s, want %s", string(body), input)
+		}
+	default:
+		t.Fatalf("unknown check: %s", checkName)
+	}
+}
+
+func assertJSONString(t *testing.T, body []byte, path, want string) {
+	t.Helper()
+	value := gjson.GetBytes(body, path)
+	if !value.Exists() {
+		t.Fatalf("expected %s to exist", path)
+	}
+	if value.String() != want {
+		t.Fatalf("value at %s = %s, want %s", path, value.String(), want)
+	}
+}
+
+func assertJSONInt(t *testing.T, body []byte, path string, want int) {
+	t.Helper()
+	value := gjson.GetBytes(body, path)
+	if !value.Exists() {
+		t.Fatalf("expected %s to exist", path)
+	}
+	if int(value.Int()) != want {
+		t.Fatalf("value at %s = %d, want %d", path, value.Int(), want)
+	}
+}
+
+func assertJSONBool(t *testing.T, body []byte, path string, want bool) {
+	t.Helper()
+	value := gjson.GetBytes(body, path)
+	if !value.Exists() {
+		t.Fatalf("expected %s to exist", path)
+	}
+	if value.Bool() != want {
+		t.Fatalf("value at %s = %t, want %t", path, value.Bool(), want)
+	}
+}
+
+func geminiBudgetModel() *registry.ModelInfo {
+	return &registry.ModelInfo{
+		ID: "gemini-2.5-pro-test",
+		Thinking: &registry.ThinkingSupport{
+			Min:         128,
+			Max:         32768,
+			ZeroAllowed: true,
+		},
+	}
+}
+
+func geminiLevelModel() *registry.ModelInfo {
+	return &registry.ModelInfo{
+		ID: "gemini-3-pro-preview-test",
+		Thinking: &registry.ThinkingSupport{
+			Min:    128,
+			Max:    32768,
+			Levels: []string{"minimal", "low", "medium", "high"},
+		},
+	}
+}
+
+func claudeBudgetModel() *registry.ModelInfo {
+	return &registry.ModelInfo{
+		ID: "claude-sonnet-4-5-test",
+		Thinking: &registry.ThinkingSupport{
+			Min:         1024,
+			Max:         128000,
+			ZeroAllowed: true,
+		},
+	}
+}
+
+func openAILevelModel() *registry.ModelInfo {
+	return &registry.ModelInfo{
+		ID: "gpt-5.2-test",
+		Thinking: &registry.ThinkingSupport{
+			Min:         128,
+			Max:         32768,
+			ZeroAllowed: true,
+			Levels:      []string{"low", "medium", "high"},
+		},
+	}
+}
+
+func iFlowModel() *registry.ModelInfo {
+	return &registry.ModelInfo{
+		ID: "glm-4.6-test",
+		Thinking: &registry.ThinkingSupport{
+			Min:         1,
+			Max:         10,
+			ZeroAllowed: true,
+		},
+	}
+}
diff --git a/internal/thinking/convert.go b/internal/thinking/convert.go
new file mode 100644
index 00000000..92e54120
--- /dev/null
+++ b/internal/thinking/convert.go
@@ -0,0 +1,233 @@
+package thinking
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+)
+
+// levelToBudgetMap defines the standard Level → Budget mapping.
+// All keys are lowercase; lookups should use strings.ToLower.
+var levelToBudgetMap = map[string]int{
+	"none":    0,
+	"auto":    -1,
+	"minimal": 512,
+	"low":     1024,
+	"medium":  8192,
+	"high":    24576,
+	"xhigh":   32768,
+}
+
+// ConvertLevelToBudget converts a thinking level to a budget value.
+//
+// This is a semantic conversion that maps discrete levels to numeric budgets.
+// Level matching is case-insensitive.
+//
+// Level → Budget mapping:
+//   - none    → 0
+//   - auto    → -1
+//   - minimal → 512
+//   - low     → 1024
+//   - medium  → 8192
+//   - high    → 24576
+//   - xhigh   → 32768
+//
+// Returns:
+//   - budget: The converted budget value
+//   - ok: true if level is valid, false otherwise
+func ConvertLevelToBudget(level string) (int, bool) {
+	budget, ok := levelToBudgetMap[strings.ToLower(level)]
+	return budget, ok
+}
+
+// BudgetThreshold constants define the upper bounds for each thinking level.
+// These are used by ConvertBudgetToLevel for range-based mapping.
+const (
+	// ThresholdMinimal is the upper bound for "minimal" level (1-512)
+	ThresholdMinimal = 512
+	// ThresholdLow is the upper bound for "low" level (513-1024)
+	ThresholdLow = 1024
+	// ThresholdMedium is the upper bound for "medium" level (1025-8192)
+	ThresholdMedium = 8192
+	// ThresholdHigh is the upper bound for "high" level (8193-24576)
+	ThresholdHigh = 24576
+)
+
+// ConvertBudgetToLevel converts a budget value to the nearest thinking level.
+//
+// This is a semantic conversion that maps numeric budgets to discrete levels.
+// Uses threshold-based mapping for range conversion.
+//
+// Budget → Level thresholds:
+//   - -1        → auto
+//   - 0         → none
+//   - 1-512     → minimal
+//   - 513-1024  → low
+//   - 1025-8192 → medium
+//   - 8193-24576 → high
+//   - 24577+    → xhigh
+//
+// Returns:
+//   - level: The converted thinking level string
+//   - ok: true if budget is valid, false for invalid negatives (< -1)
+func ConvertBudgetToLevel(budget int) (string, bool) {
+	switch {
+	case budget < -1:
+		// Invalid negative values
+		return "", false
+	case budget == -1:
+		return string(LevelAuto), true
+	case budget == 0:
+		return string(LevelNone), true
+	case budget <= ThresholdMinimal:
+		return string(LevelMinimal), true
+	case budget <= ThresholdLow:
+		return string(LevelLow), true
+	case budget <= ThresholdMedium:
+		return string(LevelMedium), true
+	case budget <= ThresholdHigh:
+		return string(LevelHigh), true
+	default:
+		return string(LevelXHigh), true
+	}
+}
+
+// ModelCapability describes the thinking format support of a model.
+type ModelCapability int
+
+const (
+	// CapabilityUnknown indicates modelInfo is nil (passthrough behavior, internal use).
+	CapabilityUnknown ModelCapability = iota - 1
+	// CapabilityNone indicates model doesn't support thinking (Thinking is nil).
+	CapabilityNone
+	// CapabilityBudgetOnly indicates the model supports numeric budgets only.
+	CapabilityBudgetOnly
+	// CapabilityLevelOnly indicates the model supports discrete levels only.
+	CapabilityLevelOnly
+	// CapabilityHybrid indicates the model supports both budgets and levels.
+	CapabilityHybrid
+)
+
+// detectModelCapability determines the thinking format capability of a model.
+//
+// This is an internal function used by NormalizeForModel to decide conversion strategy.
+// It analyzes the model's ThinkingSupport configuration to classify the model:
+//   - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking)
+//   - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5)
+//   - CapabilityLevelOnly: Has Levels but no Min/Max (OpenAI, iFlow)
+//   - CapabilityHybrid: Has both Min/Max and Levels (Gemini 3)
+//
+// Note: Returns a special sentinel value when modelInfo itself is nil (unknown model).
+func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability {
+	if modelInfo == nil {
+		return CapabilityUnknown // sentinel for "passthrough" behavior
+	}
+	if modelInfo.Thinking == nil {
+		return CapabilityNone
+	}
+	support := modelInfo.Thinking
+	hasBudget := support.Min > 0 || support.Max > 0
+	hasLevels := len(support.Levels) > 0
+
+	switch {
+	case hasBudget && hasLevels:
+		return CapabilityHybrid
+	case hasBudget:
+		return CapabilityBudgetOnly
+	case hasLevels:
+		return CapabilityLevelOnly
+	default:
+		return CapabilityNone
+	}
+}
+
+// normalizeMixedConfig resolves a thinking configuration when both budget and level
+// might be present, applying priority rules.
+//
+// Priority rules (Level takes precedence over Budget):
+//   - If level is non-empty: use level (special handling for "auto" and "none")
+//   - If level is empty and budget is set: use budget
+//   - If neither is set (budget=0, level=""): return ModeNone
+//
+// This function is used internally to handle ambiguous input configurations.
+func normalizeMixedConfig(budget int, level string) ThinkingConfig {
+	normalizedLevel := strings.ToLower(strings.TrimSpace(level))
+	if normalizedLevel != "" {
+		switch normalizedLevel {
+		case string(LevelAuto):
+			return ThinkingConfig{Mode: ModeAuto, Budget: -1, Level: ThinkingLevel(normalizedLevel)}
+		case string(LevelNone):
+			return ThinkingConfig{Mode: ModeNone, Budget: 0, Level: ThinkingLevel(normalizedLevel)}
+		default:
+			return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(normalizedLevel)}
+		}
+	}
+	switch budget {
+	case -1:
+		return ThinkingConfig{Mode: ModeAuto, Budget: -1}
+	case 0:
+		return ThinkingConfig{Mode: ModeNone, Budget: 0}
+	default:
+		return ThinkingConfig{Mode: ModeBudget, Budget: budget}
+	}
+}
+
+// NormalizeForModel normalizes a thinking configuration for a specific model.
+//
+// This function converts the configuration format based on model capabilities:
+//   - Budget-only models (Claude, Gemini 2.5): Level → Budget conversion
+//   - Level-only models (OpenAI, iFlow): Budget → Level conversion
+//   - Hybrid models (Gemini 3): preserve the original format
+//   - No thinking support (Thinking is nil): degrade to ModeNone
+//   - Unknown model (modelInfo is nil): passthrough (preserve original format)
+//
+// Parameters:
+//   - config: The thinking configuration to normalize (must not be nil)
+//   - modelInfo: Model registry information containing ThinkingSupport properties
+//
+// Returns:
+//   - Normalized ThinkingConfig suitable for the model
+//   - Error if conversion fails (e.g., unsupported level or invalid budget)
+func NormalizeForModel(config *ThinkingConfig, modelInfo *registry.ModelInfo) (*ThinkingConfig, error) {
+	if config == nil {
+		return nil, fmt.Errorf("thinking config is nil")
+	}
+
+	normalized := *config
+	capability := detectModelCapability(modelInfo)
+
+	// If model doesn't support thinking, degrade to ModeNone
+	if capability == CapabilityNone && config.Mode != ModeNone && config.Mode != ModeAuto {
+		return &ThinkingConfig{Mode: ModeNone, Budget: 0}, nil
+	}
+
+	switch config.Mode {
+	case ModeAuto, ModeNone:
+		return &normalized, nil
+	case ModeBudget:
+		if capability == CapabilityLevelOnly {
+			level, ok := ConvertBudgetToLevel(config.Budget)
+			if !ok {
+				return nil, fmt.Errorf("invalid budget: %d", config.Budget)
+			}
+			normalized.Mode = ModeLevel
+			normalized.Level = ThinkingLevel(level)
+			normalized.Budget = 0
+		}
+		return &normalized, nil
+	case ModeLevel:
+		if capability == CapabilityBudgetOnly {
+			budget, ok := ConvertLevelToBudget(string(config.Level))
+			if !ok {
+				return nil, fmt.Errorf("unknown level: %s", config.Level)
+			}
+			normalized.Mode = ModeBudget
+			normalized.Budget = budget
+			normalized.Level = ""
+		}
+		return &normalized, nil
+	default:
+		return &normalized, nil
+	}
+}
diff --git a/internal/thinking/convert_test.go b/internal/thinking/convert_test.go
new file mode 100644
index 00000000..eacc2532
--- /dev/null
+++ b/internal/thinking/convert_test.go
@@ -0,0 +1,277 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+)
+
+// TestConvertLevelToBudget tests the ConvertLevelToBudget function.
+//
+// ConvertLevelToBudget converts a thinking level to a budget value.
+// This is a semantic conversion - it does NOT apply clamping.
+//
+// Level → Budget mapping:
+//   - none    → 0
+//   - auto    → -1
+//   - minimal → 512
+//   - low     → 1024
+//   - medium  → 8192
+//   - high    → 24576
+//   - xhigh   → 32768
+func TestConvertLevelToBudget(t *testing.T) {
+	tests := []struct {
+		name   string
+		level  string
+		want   int
+		wantOK bool
+	}{
+		// Standard levels
+		{"none", "none", 0, true},
+		{"auto", "auto", -1, true},
+		{"minimal", "minimal", 512, true},
+		{"low", "low", 1024, true},
+		{"medium", "medium", 8192, true},
+		{"high", "high", 24576, true},
+		{"xhigh", "xhigh", 32768, true},
+
+		// Case insensitive
+		{"case insensitive HIGH", "HIGH", 24576, true},
+		{"case insensitive High", "High", 24576, true},
+		{"case insensitive NONE", "NONE", 0, true},
+		{"case insensitive Auto", "Auto", -1, true},
+
+		// Invalid levels
+		{"invalid ultra", "ultra", 0, false},
+		{"invalid maximum", "maximum", 0, false},
+		{"empty string", "", 0, false},
+		{"whitespace", " ", 0, false},
+		{"numeric string", "1000", 0, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			budget, ok := ConvertLevelToBudget(tt.level)
+			if ok != tt.wantOK {
+				t.Errorf("ConvertLevelToBudget(%q) ok = %v, want %v", tt.level, ok, tt.wantOK)
+			}
+			if budget != tt.want {
+				t.Errorf("ConvertLevelToBudget(%q) = %d, want %d", tt.level, budget, tt.want)
+			}
+		})
+	}
+}
+
+// TestConvertBudgetToLevel tests the ConvertBudgetToLevel function.
+//
+// ConvertBudgetToLevel converts a budget value to the nearest level.
+// Uses threshold-based mapping for range conversion.
+//
+// Budget → Level thresholds:
+//   - -1       → auto
+//   - 0        → none
+//   - 1-512    → minimal
+//   - 513-1024 → low
+//   - 1025-8192 → medium
+//   - 8193-24576 → high
+//   - 24577+   → xhigh
+//
+// Depends on: Epic 4 Story 4-2 (budget to level conversion)
+func TestConvertBudgetToLevel(t *testing.T) {
+	tests := []struct {
+		name   string
+		budget int
+		want   string
+		wantOK bool
+	}{
+		// Special values
+		{"auto", -1, "auto", true},
+		{"none", 0, "none", true},
+
+		// Invalid negative values
+		{"invalid negative -2", -2, "", false},
+		{"invalid negative -100", -100, "", false},
+		{"invalid negative extreme", -999999, "", false},
+
+		// Minimal range (1-512)
+		{"minimal min", 1, "minimal", true},
+		{"minimal mid", 256, "minimal", true},
+		{"minimal max", 512, "minimal", true},
+
+		// Low range (513-1024)
+		{"low start", 513, "low", true},
+		{"low boundary", 1024, "low", true},
+
+		// Medium range (1025-8192)
+		{"medium start", 1025, "medium", true},
+		{"medium mid", 4096, "medium", true},
+		{"medium boundary", 8192, "medium", true},
+
+		// High range (8193-24576)
+		{"high start", 8193, "high", true},
+		{"high mid", 16384, "high", true},
+		{"high boundary", 24576, "high", true},
+
+		// XHigh range (24577+)
+		{"xhigh start", 24577, "xhigh", true},
+		{"xhigh mid", 32768, "xhigh", true},
+		{"xhigh large", 100000, "xhigh", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			level, ok := ConvertBudgetToLevel(tt.budget)
+			if ok != tt.wantOK {
+				t.Errorf("ConvertBudgetToLevel(%d) ok = %v, want %v", tt.budget, ok, tt.wantOK)
+			}
+			if level != tt.want {
+				t.Errorf("ConvertBudgetToLevel(%d) = %q, want %q", tt.budget, level, tt.want)
+			}
+		})
+	}
+}
+
+// TestConvertMixedFormat tests mixed format handling.
+//
+// Tests scenarios where both level and budget might be present,
+// or where format conversion requires special handling.
+//
+// Depends on: Epic 4 Story 4-3 (mixed format handling)
+func TestConvertMixedFormat(t *testing.T) {
+	tests := []struct {
+		name        string
+		inputBudget int
+		inputLevel  string
+		wantMode    ThinkingMode
+		wantBudget  int
+		wantLevel   ThinkingLevel
+	}{
+		// Level takes precedence when both present
+		{"level and budget - level wins", 8192, "high", ModeLevel, 0, LevelHigh},
+		{"level and zero budget", 0, "high", ModeLevel, 0, LevelHigh},
+
+		// Budget only
+		{"budget only", 16384, "", ModeBudget, 16384, ""},
+
+		// Level only
+		{"level only", 0, "medium", ModeLevel, 0, LevelMedium},
+
+		// Neither (default)
+		{"neither", 0, "", ModeNone, 0, ""},
+
+		// Special values
+		{"auto level", 0, "auto", ModeAuto, -1, LevelAuto},
+		{"none level", 0, "none", ModeNone, 0, LevelNone},
+		{"auto budget", -1, "", ModeAuto, -1, ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := normalizeMixedConfig(tt.inputBudget, tt.inputLevel)
+			if got.Mode != tt.wantMode {
+				t.Errorf("normalizeMixedConfig(%d, %q) Mode = %v, want %v", tt.inputBudget, tt.inputLevel, got.Mode, tt.wantMode)
+			}
+			if got.Budget != tt.wantBudget {
+				t.Errorf("normalizeMixedConfig(%d, %q) Budget = %d, want %d", tt.inputBudget, tt.inputLevel, got.Budget, tt.wantBudget)
+			}
+			if got.Level != tt.wantLevel {
+				t.Errorf("normalizeMixedConfig(%d, %q) Level = %q, want %q", tt.inputBudget, tt.inputLevel, got.Level, tt.wantLevel)
+			}
+		})
+	}
+}
+
+// TestNormalizeForModel tests model-aware format normalization.
+func TestNormalizeForModel(t *testing.T) {
+	budgetOnlyModel := &registry.ModelInfo{
+		Thinking: &registry.ThinkingSupport{
+			Min: 1024,
+			Max: 128000,
+		},
+	}
+	levelOnlyModel := &registry.ModelInfo{
+		Thinking: &registry.ThinkingSupport{
+			Levels: []string{"low", "medium", "high"},
+		},
+	}
+	hybridModel := &registry.ModelInfo{
+		Thinking: &registry.ThinkingSupport{
+			Min:    128,
+			Max:    32768,
+			Levels: []string{"minimal", "low", "medium", "high"},
+		},
+	}
+
+	tests := []struct {
+		name    string
+		config  ThinkingConfig
+		model   *registry.ModelInfo
+		want    ThinkingConfig
+		wantErr bool
+	}{
+		{"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false},
+		{"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 24576}, false},
+		{"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}, false},
+		{"level-only keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, false},
+		{"hybrid keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 16384}, hybridModel, ThinkingConfig{Mode: ModeBudget, Budget: 16384}, false},
+		{"hybrid keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, hybridModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, false},
+		{"auto passthrough", ThinkingConfig{Mode: ModeAuto, Budget: -1}, levelOnlyModel, ThinkingConfig{Mode: ModeAuto, Budget: -1}, false},
+		{"none passthrough", ThinkingConfig{Mode: ModeNone, Budget: 0}, budgetOnlyModel, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
+		{"invalid level", ThinkingConfig{Mode: ModeLevel, Level: "ultra"}, budgetOnlyModel, ThinkingConfig{}, true},
+		{"invalid budget", ThinkingConfig{Mode: ModeBudget, Budget: -2}, levelOnlyModel, ThinkingConfig{}, true},
+		{"nil modelInfo passthrough budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false},
+		{"nil modelInfo passthrough level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, nil, ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, false},
+		{"nil thinking degrades to none", ThinkingConfig{Mode: ModeBudget, Budget: 4096}, &registry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
+		{"nil thinking level degrades to none", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, &registry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := NormalizeForModel(&tt.config, tt.model)
+			if (err != nil) != tt.wantErr {
+				t.Fatalf("NormalizeForModel(%+v) error = %v, wantErr %v", tt.config, err, tt.wantErr)
+			}
+			if tt.wantErr {
+				return
+			}
+			if got == nil {
+				t.Fatalf("NormalizeForModel(%+v) returned nil config", tt.config)
+			}
+			if got.Mode != tt.want.Mode {
+				t.Errorf("NormalizeForModel(%+v) Mode = %v, want %v", tt.config, got.Mode, tt.want.Mode)
+			}
+			if got.Budget != tt.want.Budget {
+				t.Errorf("NormalizeForModel(%+v) Budget = %d, want %d", tt.config, got.Budget, tt.want.Budget)
+			}
+			if got.Level != tt.want.Level {
+				t.Errorf("NormalizeForModel(%+v) Level = %q, want %q", tt.config, got.Level, tt.want.Level)
+			}
+		})
+	}
+}
+
+// TestLevelToBudgetRoundTrip tests level → budget → level round trip.
+//
+// Verifies that converting level to budget and back produces consistent results.
+//
+// Depends on: Epic 4 Story 4-1, 4-2
+func TestLevelToBudgetRoundTrip(t *testing.T) {
+	levels := []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}
+
+	for _, level := range levels {
+		t.Run(level, func(t *testing.T) {
+			budget, ok := ConvertLevelToBudget(level)
+			if !ok {
+				t.Fatalf("ConvertLevelToBudget(%q) returned ok=false", level)
+			}
+			resultLevel, ok := ConvertBudgetToLevel(budget)
+			if !ok {
+				t.Fatalf("ConvertBudgetToLevel(%d) returned ok=false", budget)
+			}
+			if resultLevel != level {
+				t.Errorf("round trip: %q → %d → %q, want %q", level, budget, resultLevel, level)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/errors.go b/internal/thinking/errors.go
new file mode 100644
index 00000000..56f82c68
--- /dev/null
+++ b/internal/thinking/errors.go
@@ -0,0 +1,71 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+// ErrorCode represents the type of thinking configuration error.
+type ErrorCode string
+
+// Error codes for thinking configuration processing.
+const (
+	// ErrInvalidSuffix indicates the suffix format cannot be parsed.
+	// Example: "model(abc" (missing closing parenthesis)
+	ErrInvalidSuffix ErrorCode = "INVALID_SUFFIX"
+
+	// ErrUnknownLevel indicates the level value is not in the valid list.
+	// Example: "model(ultra)" where "ultra" is not a valid level
+	ErrUnknownLevel ErrorCode = "UNKNOWN_LEVEL"
+
+	// ErrThinkingNotSupported indicates the model does not support thinking.
+	// Example: claude-haiku-4-5 does not have thinking capability
+	ErrThinkingNotSupported ErrorCode = "THINKING_NOT_SUPPORTED"
+
+	// ErrLevelNotSupported indicates the model does not support level mode.
+	// Example: using level with a budget-only model
+	ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED"
+
+	// ErrProviderMismatch indicates the provider does not match the model.
+	// Example: applying Claude format to a Gemini model
+	ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH"
+)
+
+// ThinkingError represents an error that occurred during thinking configuration processing.
+//
+// This error type provides structured information about the error, including:
+//   - Code: A machine-readable error code for programmatic handling
+//   - Message: A human-readable description of the error
+//   - Model: The model name related to the error (optional)
+//   - Details: Additional context information (optional)
+type ThinkingError struct {
+	// Code is the machine-readable error code
+	Code ErrorCode
+	// Message is the human-readable error description.
+	// Should be lowercase, no trailing period, with context if applicable.
+	Message string
+	// Model is the model name related to this error (optional)
+	Model string
+	// Details contains additional context information (optional)
+	Details map[string]interface{}
+}
+
+// Error implements the error interface.
+// Returns the message directly without code prefix.
+// Use Code field for programmatic error handling.
+func (e *ThinkingError) Error() string {
+	return e.Message
+}
+
+// NewThinkingError creates a new ThinkingError with the given code and message.
+func NewThinkingError(code ErrorCode, message string) *ThinkingError {
+	return &ThinkingError{
+		Code:    code,
+		Message: message,
+	}
+}
+
+// NewThinkingErrorWithModel creates a new ThinkingError with model context.
+func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingError {
+	return &ThinkingError{
+		Code:    code,
+		Message: message,
+		Model:   model,
+	}
+}
diff --git a/internal/thinking/errors_test.go b/internal/thinking/errors_test.go
new file mode 100644
index 00000000..5ed2d0d0
--- /dev/null
+++ b/internal/thinking/errors_test.go
@@ -0,0 +1,34 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import "testing"
+
+// TestThinkingErrorError tests the Error() method of ThinkingError.
+//
+// Error() returns the message directly without code prefix.
+// Use Code field for programmatic error handling.
+func TestThinkingErrorError(t *testing.T) {
+	tests := []struct {
+		name     string
+		err      *ThinkingError
+		wantMsg  string
+		wantCode ErrorCode
+	}{
+		{"invalid suffix format", NewThinkingError(ErrInvalidSuffix, "invalid suffix format: model(abc"), "invalid suffix format: model(abc", ErrInvalidSuffix},
+		{"unknown level", NewThinkingError(ErrUnknownLevel, "unknown level: ultra"), "unknown level: ultra", ErrUnknownLevel},
+		{"level not supported", NewThinkingError(ErrLevelNotSupported, "level \"xhigh\" not supported, valid levels: low, medium, high"), "level \"xhigh\" not supported, valid levels: low, medium, high", ErrLevelNotSupported},
+		{"thinking not supported", NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "claude-haiku"), "thinking not supported for this model", ErrThinkingNotSupported},
+		{"provider mismatch", NewThinkingError(ErrProviderMismatch, "provider mismatch: expected claude, got gemini"), "provider mismatch: expected claude, got gemini", ErrProviderMismatch},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := tt.err.Error(); got != tt.wantMsg {
+				t.Errorf("Error() = %q, want %q", got, tt.wantMsg)
+			}
+			if tt.err.Code != tt.wantCode {
+				t.Errorf("Code = %q, want %q", tt.err.Code, tt.wantCode)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/extract_test.go b/internal/thinking/extract_test.go
new file mode 100644
index 00000000..c697e130
--- /dev/null
+++ b/internal/thinking/extract_test.go
@@ -0,0 +1,42 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import "testing"
+
+func TestExtractThinkingConfig(t *testing.T) {
+	tests := []struct {
+		name     string
+		body     string
+		provider string
+		want     ThinkingConfig
+	}{
+		{"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 16384}},
+		{"claude disabled type", `{"thinking":{"type":"disabled"}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}},
+		{"claude auto budget", `{"thinking":{"budget_tokens":-1}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
+		{"claude enabled type without budget", `{"thinking":{"type":"enabled"}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
+		{"claude enabled type with budget", `{"thinking":{"type":"enabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 8192}},
+		{"claude disabled type overrides budget", `{"thinking":{"type":"disabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}},
+		{"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini", ThinkingConfig{Mode: ModeBudget, Budget: 8192}},
+		{"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}},
+		{"gemini cli auto", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"auto"}}}}`, "gemini-cli", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
+		{"openai level", `{"reasoning_effort":"medium"}`, "openai", ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}},
+		{"openai none", `{"reasoning_effort":"none"}`, "openai", ThinkingConfig{Mode: ModeNone, Budget: 0}},
+		{"codex effort high", `{"reasoning":{"effort":"high"}}`, "codex", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}},
+		{"codex effort none", `{"reasoning":{"effort":"none"}}`, "codex", ThinkingConfig{Mode: ModeNone, Budget: 0}},
+		{"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "iflow", ThinkingConfig{Mode: ModeBudget, Budget: 1}},
+		{"iflow disable", `{"reasoning_split":false}`, "iflow", ThinkingConfig{Mode: ModeNone, Budget: 0}},
+		{"unknown provider", `{"thinking":{"budget_tokens":123}}`, "unknown", ThinkingConfig{}},
+		{"invalid json", `{"thinking":`, "claude", ThinkingConfig{}},
+		{"empty body", "", "claude", ThinkingConfig{}},
+		{"no config", `{}`, "gemini", ThinkingConfig{}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := extractThinkingConfig([]byte(tt.body), tt.provider)
+			if got != tt.want {
+				t.Fatalf("extractThinkingConfig() = %+v, want %+v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go
new file mode 100644
index 00000000..e1409389
--- /dev/null
+++ b/internal/thinking/provider/claude/apply.go
@@ -0,0 +1,116 @@
+// Package claude implements thinking configuration scaffolding for Claude models.
+//
+// Claude models use the thinking.budget_tokens format with values in the range
+// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5),
+// while older models do not.
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-6
+package claude
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for Claude models.
+// This applier is stateless and holds no configuration.
+type Applier struct{}
+
+// NewApplier creates a new Claude thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("claude", NewApplier())
+}
+
+// Apply applies thinking configuration to Claude request body.
+//
+// IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig.
+// ValidateConfig handles:
+//   - Mode conversion (Level→Budget, Auto→Budget)
+//   - Budget clamping to model range
+//   - ZeroAllowed constraint enforcement
+//
+// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged.
+//
+// Expected output format when enabled:
+//
+//	{
+//	  "thinking": {
+//	    "type": "enabled",
+//	    "budget_tokens": 16384
+//	  }
+//	}
+//
+// Expected output format when disabled:
+//
+//	{
+//	  "thinking": {
+//	    "type": "disabled"
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return applyCompatibleClaude(body, config)
+	}
+
+	// Only process ModeBudget and ModeNone; other modes pass through
+	// (caller should use ValidateConfig first to normalize modes)
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced)
+	// Decide enabled/disabled based on budget value
+	if config.Budget == 0 {
+		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+	result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+	return result, nil
+}
+
+func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	switch config.Mode {
+	case thinking.ModeNone:
+		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	case thinking.ModeAuto:
+		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	default:
+		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+		return result, nil
+	}
+}
diff --git a/internal/thinking/provider/claude/apply_test.go b/internal/thinking/provider/claude/apply_test.go
new file mode 100644
index 00000000..769a17c4
--- /dev/null
+++ b/internal/thinking/provider/claude/apply_test.go
@@ -0,0 +1,288 @@
+// Package claude implements thinking configuration for Claude models.
+package claude
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+// =============================================================================
+// Unit Tests: Applier Creation and Interface
+// =============================================================================
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatal("NewApplier() returned nil")
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	var _ thinking.ProviderApplier = (*Applier)(nil)
+}
+
+// =============================================================================
+// Unit Tests: Budget and Disable Logic (Pre-validated Config)
+// =============================================================================
+
+// TestClaudeApplyBudgetAndNone tests budget values and disable modes.
+// NOTE: These tests assume config has been pre-validated by ValidateConfig.
+// Apply trusts the input and does not perform clamping.
+func TestClaudeApplyBudgetAndNone(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name         string
+		config       thinking.ThinkingConfig
+		wantType     string
+		wantBudget   int
+		wantBudgetOK bool
+	}{
+		// Valid pre-validated budget values
+		{"budget 16k", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, "enabled", 16384, true},
+		{"budget min", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1024}, "enabled", 1024, true},
+		{"budget max", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 128000}, "enabled", 128000, true},
+		{"budget mid", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "enabled", 50000, true},
+		// Disable cases
+		{"budget zero disables", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "disabled", 0, false},
+		{"mode none disables", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "disabled", 0, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			thinkingType := gjson.GetBytes(result, "thinking.type").String()
+			if thinkingType != tt.wantType {
+				t.Fatalf("thinking.type = %q, want %q", thinkingType, tt.wantType)
+			}
+
+			budgetValue := gjson.GetBytes(result, "thinking.budget_tokens")
+			if budgetValue.Exists() != tt.wantBudgetOK {
+				t.Fatalf("thinking.budget_tokens exists = %v, want %v", budgetValue.Exists(), tt.wantBudgetOK)
+			}
+			if tt.wantBudgetOK {
+				if got := int(budgetValue.Int()); got != tt.wantBudget {
+					t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
+				}
+			}
+		})
+	}
+}
+
+// TestClaudeApplyPassthroughBudget tests that Apply trusts pre-validated budget values.
+// It does NOT perform clamping - that's ValidateConfig's responsibility.
+func TestClaudeApplyPassthroughBudget(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name       string
+		config     thinking.ThinkingConfig
+		wantBudget int
+	}{
+		// Apply should pass through the budget value as-is
+		// (ValidateConfig would have clamped these, but Apply trusts the input)
+		{"passes through any budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 500}, 500},
+		{"passes through large budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 200000}, 200000},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
+				t.Fatalf("thinking.budget_tokens = %d, want %d (passthrough)", got, tt.wantBudget)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Unit Tests: Mode Passthrough (Strict Layering)
+// =============================================================================
+
+// TestClaudeApplyModePassthrough tests that non-Budget/None modes pass through unchanged.
+// Apply expects ValidateConfig to have already converted Level/Auto to Budget.
+func TestClaudeApplyModePassthrough(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name   string
+		config thinking.ThinkingConfig
+		body   string
+	}{
+		{"ModeLevel passes through", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "high"}, `{"model":"test"}`},
+		{"ModeAuto passes through", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, `{"model":"test"}`},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Should return body unchanged
+			if string(result) != tt.body {
+				t.Fatalf("Apply() = %s, want %s (passthrough)", string(result), tt.body)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Unit Tests: Output Format
+// =============================================================================
+
+// TestClaudeApplyOutputFormat tests the exact JSON output format.
+//
+// Claude expects:
+//
+//	{
+//	  "thinking": {
+//	    "type": "enabled",
+//	    "budget_tokens": 16384
+//	  }
+//	}
+func TestClaudeApplyOutputFormat(t *testing.T) {
+	tests := []struct {
+		name     string
+		config   thinking.ThinkingConfig
+		wantJSON string
+	}{
+		{
+			"enabled with budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
+		},
+		{
+			"disabled",
+			thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0},
+			`{"thinking":{"type":"disabled"}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			applier := NewApplier()
+			modelInfo := buildClaudeModelInfo()
+
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != tt.wantJSON {
+				t.Fatalf("Apply() = %s, want %s", result, tt.wantJSON)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Unit Tests: Body Merging
+// =============================================================================
+
+// TestClaudeApplyWithExistingBody tests applying config to existing request body.
+func TestClaudeApplyWithExistingBody(t *testing.T) {
+	tests := []struct {
+		name     string
+		body     string
+		config   thinking.ThinkingConfig
+		wantBody string
+	}{
+		{
+			"add to empty body",
+			`{}`,
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
+		},
+		{
+			"preserve existing fields",
+			`{"model":"claude-sonnet-4-5","messages":[]}`,
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
+			`{"model":"claude-sonnet-4-5","messages":[],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+		},
+		{
+			"override existing thinking",
+			`{"thinking":{"type":"enabled","budget_tokens":1000}}`,
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			applier := NewApplier()
+			modelInfo := buildClaudeModelInfo()
+
+			result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != tt.wantBody {
+				t.Fatalf("Apply() = %s, want %s", result, tt.wantBody)
+			}
+		})
+	}
+}
+
+// TestClaudeApplyWithNilBody tests handling of nil/empty body.
+func TestClaudeApplyWithNilBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name       string
+		body       []byte
+		wantBudget int
+	}{
+		{"nil body", nil, 16384},
+		{"empty body", []byte{}, 16384},
+		{"empty object", []byte(`{}`), 16384},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}
+			result, err := applier.Apply(tt.body, config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			if got := gjson.GetBytes(result, "thinking.type").String(); got != "enabled" {
+				t.Fatalf("thinking.type = %q, want %q", got, "enabled")
+			}
+			if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
+				t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Helper Functions
+// =============================================================================
+
+func buildClaudeModelInfo() *registry.ModelInfo {
+	return &registry.ModelInfo{
+		ID: "claude-sonnet-4-5",
+		Thinking: &registry.ThinkingSupport{
+			Min:            1024,
+			Max:            128000,
+			ZeroAllowed:    true,
+			DynamicAllowed: false,
+		},
+	}
+}
diff --git a/internal/thinking/provider/codex/apply.go b/internal/thinking/provider/codex/apply.go
new file mode 100644
index 00000000..386185a6
--- /dev/null
+++ b/internal/thinking/provider/codex/apply.go
@@ -0,0 +1,138 @@
+// Package codex implements thinking configuration for Codex (OpenAI Responses API) models.
+//
+// Codex models use the reasoning.effort format with discrete levels
+// (low/medium/high). This is similar to OpenAI but uses nested field
+// "reasoning.effort" instead of "reasoning_effort".
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
+package codex
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for Codex models.
+//
+// Codex-specific behavior:
+//   - Output format: reasoning.effort (string: low/medium/high/xhigh)
+//   - Level-only mode: no numeric budget support
+//   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Codex thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("codex", NewApplier())
+}
+
+// Apply applies thinking configuration to Codex request body.
+//
+// Expected output format:
+//
+//	{
+//	  "reasoning": {
+//	    "effort": "high"
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return applyCompatibleCodex(body, config)
+	}
+
+	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
+	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeLevel {
+		result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level))
+		return result, nil
+	}
+
+	effort := ""
+	support := modelInfo.Thinking
+	if config.Budget == 0 {
+		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
+			effort = string(thinking.LevelNone)
+		}
+	}
+	if effort == "" && config.Level != "" {
+		effort = string(config.Level)
+	}
+	if effort == "" && len(support.Levels) > 0 {
+		effort = support.Levels[0]
+	}
+	if effort == "" {
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
+	return result, nil
+}
+
+func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		effort = string(thinking.LevelNone)
+		if config.Level != "" {
+			effort = string(config.Level)
+		}
+	case thinking.ModeAuto:
+		// Auto mode for user-defined models: pass through as "auto"
+		effort = string(thinking.LevelAuto)
+	case thinking.ModeBudget:
+		// Budget mode: convert budget to level using threshold mapping
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	default:
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
+	return result, nil
+}
+
+func hasLevel(levels []string, target string) bool {
+	for _, level := range levels {
+		if strings.EqualFold(strings.TrimSpace(level), target) {
+			return true
+		}
+	}
+	return false
+}
diff --git a/internal/thinking/provider/gemini/apply.go b/internal/thinking/provider/gemini/apply.go
new file mode 100644
index 00000000..7de48919
--- /dev/null
+++ b/internal/thinking/provider/gemini/apply.go
@@ -0,0 +1,172 @@
+// Package gemini implements thinking configuration for Gemini models.
+//
+// Gemini models have two formats:
+//   - Gemini 2.5: Uses thinkingBudget (numeric)
+//   - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high)
+//     or thinkingBudget=-1 for auto/dynamic mode
+//
+// Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels:
+//   - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x)
+//   - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels)
+//   - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5)
+package gemini
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Gemini models.
+//
+// Gemini-specific behavior:
+//   - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed
+//   - Gemini 3.x: thinkingLevel format, cannot be disabled
+//   - Use ThinkingSupport.Levels to decide output format
+type Applier struct{}
+
+// NewApplier creates a new Gemini thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("gemini", NewApplier())
+}
+
+// Apply applies thinking configuration to Gemini request body.
+//
+// Expected output format (Gemini 2.5):
+//
+//	{
+//	  "generationConfig": {
+//	    "thinkingConfig": {
+//	      "thinkingBudget": 8192,
+//	      "includeThoughts": true
+//	    }
+//	  }
+//	}
+//
+// Expected output format (Gemini 3.x):
+//
+//	{
+//	  "generationConfig": {
+//	    "thinkingConfig": {
+//	      "thinkingLevel": "high",
+//	      "includeThoughts": true
+//	    }
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return a.applyCompatible(body, config)
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// Choose format based on config.Mode and model capabilities:
+	// - ModeLevel: use Level format (validation will reject unsupported levels)
+	// - ModeNone: use Level format if model has Levels, else Budget format
+	// - ModeBudget/ModeAuto: use Budget format
+	switch config.Mode {
+	case thinking.ModeLevel:
+		return a.applyLevelFormat(body, config)
+	case thinking.ModeNone:
+		// ModeNone: route based on model capability (has Levels or not)
+		if len(modelInfo.Thinking.Levels) > 0 {
+			return a.applyLevelFormat(body, config)
+		}
+		return a.applyBudgetFormat(body, config)
+	default:
+		return a.applyBudgetFormat(body, config)
+	}
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// ModeNone semantics:
+	//   - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models)
+	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
+	// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.
+
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
+
+	budget := config.Budget
+	// ModeNone semantics:
+	//   - ModeNone + Budget=0: completely disable thinking
+	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
+	// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
diff --git a/internal/thinking/provider/gemini/apply_test.go b/internal/thinking/provider/gemini/apply_test.go
new file mode 100644
index 00000000..5f762a2f
--- /dev/null
+++ b/internal/thinking/provider/gemini/apply_test.go
@@ -0,0 +1,526 @@
+// Package gemini implements thinking configuration for Gemini models.
+package gemini
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatal("NewApplier() returned nil")
+	}
+}
+
+// parseConfigFromSuffix parses a raw suffix into a ThinkingConfig.
+// This helper reduces code duplication in end-to-end tests (L1 fix).
+func parseConfigFromSuffix(rawSuffix string) (thinking.ThinkingConfig, bool) {
+	if budget, ok := thinking.ParseNumericSuffix(rawSuffix); ok {
+		return thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: budget}, true
+	}
+	if level, ok := thinking.ParseLevelSuffix(rawSuffix); ok {
+		return thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: level}, true
+	}
+	if mode, ok := thinking.ParseSpecialSuffix(rawSuffix); ok {
+		config := thinking.ThinkingConfig{Mode: mode}
+		if mode == thinking.ModeAuto {
+			config.Budget = -1
+		}
+		return config, true
+	}
+	return thinking.ThinkingConfig{}, false
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	// Compile-time check: if Applier doesn't implement the interface, this won't compile
+	var _ thinking.ProviderApplier = (*Applier)(nil)
+}
+
+// TestGeminiApply tests the Gemini thinking applier.
+//
+// Gemini-specific behavior:
+//   - Gemini 2.5: thinkingBudget format (numeric)
+//   - Gemini 3.x: thinkingLevel format (string)
+//   - Flash series: ZeroAllowed=true
+//   - Pro series: ZeroAllowed=false, Min=128
+//   - CRITICAL: When budget=0/none, set includeThoughts=false
+//
+// Depends on: Epic 7 Story 7-2, 7-3
+func TestGeminiApply(t *testing.T) {
+	applier := NewApplier()
+	tests := []struct {
+		name                string
+		model               string
+		config              thinking.ThinkingConfig
+		wantField           string
+		wantValue           interface{}
+		wantIncludeThoughts bool // CRITICAL: includeThoughts field
+	}{
+		// Gemini 2.5 Flash (ZeroAllowed=true)
+		{"flash budget 8k", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
+		{"flash zero", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
+		{"flash none", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
+
+		// Gemini 2.5 Pro (ZeroAllowed=false, Min=128)
+		{"pro budget 8k", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
+		{"pro zero - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 128, false},
+		{"pro none - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 128, false},
+		{"pro below min", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50}, "thinkingBudget", 128, true},
+		{"pro above max", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "thinkingBudget", 32768, true},
+		{"pro auto", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+
+		// Gemini 3 Pro (Level mode, ZeroAllowed=false)
+		{"g3-pro high", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
+		{"g3-pro low", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
+		{"g3-pro auto", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+
+		// Gemini 3 Flash (Level mode, minimal is lowest)
+		{"g3-flash high", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
+		{"g3-flash medium", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "thinkingLevel", "medium", true},
+		{"g3-flash minimal", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiModelInfo(tt.model)
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
+			switch want := tt.wantValue.(type) {
+			case int:
+				if int(gotField.Int()) != want {
+					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
+				}
+			case string:
+				if gotField.String() != want {
+					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
+				}
+			case bool:
+				if gotField.Bool() != want {
+					t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
+				}
+			default:
+				t.Fatalf("unsupported wantValue type %T", tt.wantValue)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+// TestGeminiApplyEndToEndBudgetZero tests suffix parsing + validation + apply for budget=0.
+//
+// This test covers the complete flow from suffix parsing to Apply output:
+//   - AC#1: ModeBudget+Budget=0 → ModeNone conversion
+//   - AC#3: Gemini 3 ModeNone+Budget>0 → includeThoughts=false + thinkingLevel=low
+//   - AC#4: Gemini 2.5 Pro (0) → clamped to 128 + includeThoughts=false
+func TestGeminiApplyEndToEndBudgetZero(t *testing.T) {
+	tests := []struct {
+		name                string
+		model               string
+		wantModel           string
+		wantField           string // "thinkingBudget" or "thinkingLevel"
+		wantValue           interface{}
+		wantIncludeThoughts bool
+	}{
+		// AC#4: Gemini 2.5 Pro - Budget format
+		{"gemini-25-pro zero", "gemini-2.5-pro(0)", "gemini-2.5-pro", "thinkingBudget", 128, false},
+		// AC#3: Gemini 3 Pro - Level format, ModeNone clamped to Budget=128, uses lowest level
+		{"gemini-3-pro zero", "gemini-3-pro-preview(0)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
+		{"gemini-3-pro none", "gemini-3-pro-preview(none)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
+		// Gemini 3 Flash - Level format, lowest level is "minimal"
+		{"gemini-3-flash zero", "gemini-3-flash-preview(0)", "gemini-3-flash-preview", "thinkingLevel", "minimal", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			suffix := thinking.ParseSuffix(tt.model)
+			if !suffix.HasSuffix {
+				t.Fatalf("ParseSuffix(%q) HasSuffix = false, want true", tt.model)
+			}
+			if suffix.ModelName != tt.wantModel {
+				t.Fatalf("ParseSuffix(%q) ModelName = %q, want %q", tt.model, suffix.ModelName, tt.wantModel)
+			}
+
+			// Parse suffix value using helper function (L1 fix)
+			config, ok := parseConfigFromSuffix(suffix.RawSuffix)
+			if !ok {
+				t.Fatalf("ParseSuffix(%q) RawSuffix = %q is not a valid suffix", tt.model, suffix.RawSuffix)
+			}
+
+			modelInfo := buildGeminiModelInfo(suffix.ModelName)
+			normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Verify the output field value
+			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
+			switch want := tt.wantValue.(type) {
+			case int:
+				if int(gotField.Int()) != want {
+					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
+				}
+			case string:
+				if gotField.String() != want {
+					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
+				}
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+// TestGeminiApplyEndToEndAuto tests auto mode through both suffix parsing and direct config.
+//
+// This test covers:
+//   - AC#2: Gemini 2.5 auto uses thinkingBudget=-1
+//   - AC#3: Gemini 3 auto uses thinkingBudget=-1 (not thinkingLevel)
+//   - Suffix parsing path: (auto) and (-1) suffixes
+//   - Direct config path: ModeLevel + Level=auto → ModeAuto conversion
+func TestGeminiApplyEndToEndAuto(t *testing.T) {
+	tests := []struct {
+		name                string
+		model               string                   // model name (with suffix for parsing, or plain for direct config)
+		directConfig        *thinking.ThinkingConfig // if not nil, use direct config instead of suffix parsing
+		wantField           string
+		wantValue           int
+		wantIncludeThoughts bool
+	}{
+		// Suffix parsing path - Budget-only model (Gemini 2.5)
+		{"suffix auto g25", "gemini-2.5-pro(auto)", nil, "thinkingBudget", -1, true},
+		{"suffix -1 g25", "gemini-2.5-pro(-1)", nil, "thinkingBudget", -1, true},
+		// Suffix parsing path - Hybrid model (Gemini 3)
+		{"suffix auto g3", "gemini-3-pro-preview(auto)", nil, "thinkingBudget", -1, true},
+		{"suffix -1 g3", "gemini-3-pro-preview(-1)", nil, "thinkingBudget", -1, true},
+		// Direct config path - Level=auto → ModeAuto conversion
+		{"direct level=auto g25", "gemini-2.5-pro", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
+		{"direct level=auto g3", "gemini-3-pro-preview", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var config thinking.ThinkingConfig
+			var modelName string
+
+			if tt.directConfig != nil {
+				// Direct config path
+				config = *tt.directConfig
+				modelName = tt.model
+			} else {
+				// Suffix parsing path
+				suffix := thinking.ParseSuffix(tt.model)
+				if !suffix.HasSuffix {
+					t.Fatalf("ParseSuffix(%q) HasSuffix = false", tt.model)
+				}
+				modelName = suffix.ModelName
+				var ok bool
+				config, ok = parseConfigFromSuffix(suffix.RawSuffix)
+				if !ok {
+					t.Fatalf("parseConfigFromSuffix(%q) failed", suffix.RawSuffix)
+				}
+			}
+
+			modelInfo := buildGeminiModelInfo(modelName)
+			normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			// Verify ModeAuto after validation
+			if normalized.Mode != thinking.ModeAuto {
+				t.Fatalf("ValidateConfig() Mode = %v, want ModeAuto", normalized.Mode)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
+			if int(gotField.Int()) != tt.wantValue {
+				t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), tt.wantValue)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+func TestGeminiApplyInvalidBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiModelInfo("gemini-2.5-flash")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
+	if err != nil {
+		t.Fatalf("ValidateConfig() error = %v", err)
+	}
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"invalid json", []byte("{\"not json\"")},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply(tt.body, *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotBudget := int(gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int())
+			if gotBudget != 8192 {
+				t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if !gotIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
+			}
+		})
+	}
+}
+
+// TestGeminiApplyConflictingFields tests that conflicting fields are removed.
+//
+// When applying Budget format, any existing thinkingLevel should be removed.
+// When applying Level format, any existing thinkingBudget should be removed.
+func TestGeminiApplyConflictingFields(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name         string
+		model        string
+		config       thinking.ThinkingConfig
+		existingBody string
+		wantField    string // expected field to exist
+		wantNoField  string // expected field to NOT exist
+	}{
+		// Budget format should remove existing thinkingLevel
+		{
+			"budget removes level",
+			"gemini-2.5-pro",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
+			`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+		// Level format should remove existing thinkingBudget
+		{
+			"level removes budget",
+			"gemini-3-pro-preview",
+			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
+			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			"thinkingLevel",
+			"thinkingBudget",
+		},
+		// ModeAuto uses budget format, should remove thinkingLevel
+		{
+			"auto removes level",
+			"gemini-3-pro-preview",
+			thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
+			`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiModelInfo(tt.model)
+			result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Verify expected field exists
+			wantPath := "generationConfig.thinkingConfig." + tt.wantField
+			if !gjson.GetBytes(result, wantPath).Exists() {
+				t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
+			}
+
+			// Verify conflicting field was removed
+			noPath := "generationConfig.thinkingConfig." + tt.wantNoField
+			if gjson.GetBytes(result, noPath).Exists() {
+				t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
+			}
+		})
+	}
+}
+
+// TestGeminiApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
+func TestGeminiApplyThinkingNotSupported(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+
+	// Model with nil Thinking support
+	modelInfo := &registry.ModelInfo{ID: "gemini-unknown", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() expected error for nil Thinking, got nil")
+	}
+
+	// Verify it's the correct error type
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Code != thinking.ErrThinkingNotSupported {
+		t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
+	}
+}
+
+func buildGeminiModelInfo(modelID string) *registry.ModelInfo {
+	support := &registry.ThinkingSupport{}
+	switch modelID {
+	case "gemini-2.5-pro":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+	case "gemini-2.5-flash", "gemini-2.5-flash-lite":
+		support.Min = 0
+		support.Max = 24576
+		support.ZeroAllowed = true
+		support.DynamicAllowed = true
+	case "gemini-3-pro-preview":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+		support.Levels = []string{"low", "high"}
+	case "gemini-3-flash-preview":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+		support.Levels = []string{"minimal", "low", "medium", "high"}
+	default:
+		// Unknown model - return nil Thinking to trigger error path
+		return &registry.ModelInfo{ID: modelID, Thinking: nil}
+	}
+	return &registry.ModelInfo{
+		ID:       modelID,
+		Thinking: support,
+	}
+}
+
+// TestGeminiApplyNilModelInfo tests Apply behavior when modelInfo is nil.
+// Coverage: apply.go:56-58 (H1)
+func TestGeminiApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, nil)
+	if err != nil {
+		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
+	}
+	if string(result) != string(body) {
+		t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
+	}
+}
+
+// TestGeminiApplyEmptyModelID tests Apply when modelID is empty.
+// Coverage: apply.go:61-63 (H2)
+func TestGeminiApplyEmptyModelID(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() with empty modelID and nil Thinking should error")
+	}
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Model != "unknown" {
+		t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
+	}
+}
+
+// TestGeminiApplyModeBudgetWithLevels tests that ModeBudget is applied with budget format
+// even for models with Levels. The Apply layer handles ModeBudget by applying thinkingBudget.
+// Coverage: apply.go:88-90
+func TestGeminiApplyModeBudgetWithLevels(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiModelInfo("gemini-3-flash-preview")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+	// ModeBudget applies budget format
+	budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int()
+	if budget != 8192 {
+		t.Fatalf("Apply() expected thinkingBudget=8192, got: %d", budget)
+	}
+}
+
+// TestGeminiApplyUnsupportedMode tests behavior with unsupported Mode types.
+// Coverage: apply.go:67-69 and 97-98 (H5, L2)
+func TestGeminiApplyUnsupportedMode(t *testing.T) {
+	applier := NewApplier()
+	body := []byte(`{"existing": "data"}`)
+
+	tests := []struct {
+		name   string
+		model  string
+		config thinking.ThinkingConfig
+	}{
+		{"unknown mode with budget model", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
+		{"unknown mode with level model", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiModelInfo(tt.model)
+			result, err := applier.Apply(body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			// Unsupported modes return original body unchanged
+			if string(result) != string(body) {
+				t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go
new file mode 100644
index 00000000..b076b7a6
--- /dev/null
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -0,0 +1,128 @@
+// Package geminicli implements thinking configuration for Gemini CLI API format.
+//
+// Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of
+// generationConfig.thinkingConfig.* used by standard Gemini API.
+package geminicli
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Gemini CLI API format.
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Gemini CLI thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	applier := NewApplier()
+	thinking.RegisterProvider("gemini-cli", applier)
+	thinking.RegisterProvider("antigravity", applier)
+}
+
+// Apply applies thinking configuration to Gemini CLI request body.
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return a.applyCompatible(body, config)
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// ModeAuto: Always use Budget format with thinkingBudget=-1
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	// For non-auto modes, choose format based on model capabilities
+	support := modelInfo.Thinking
+	if len(support.Levels) > 0 {
+		return a.applyLevelFormat(body, config)
+	}
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+
+	budget := config.Budget
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
diff --git a/internal/thinking/provider/geminicli/apply_test.go b/internal/thinking/provider/geminicli/apply_test.go
new file mode 100644
index 00000000..a606457c
--- /dev/null
+++ b/internal/thinking/provider/geminicli/apply_test.go
@@ -0,0 +1,382 @@
+// Package geminicli implements thinking configuration for Gemini CLI API format.
+package geminicli
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatal("NewApplier() returned nil")
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	// Compile-time check: if Applier doesn't implement the interface, this won't compile
+	var _ thinking.ProviderApplier = (*Applier)(nil)
+}
+
+// TestGeminiCLIApply tests the Gemini CLI thinking applier.
+//
+// Gemini CLI uses request.generationConfig.thinkingConfig.* path.
+// Behavior mirrors Gemini applier but with different JSON path prefix.
+func TestGeminiCLIApply(t *testing.T) {
+	applier := NewApplier()
+	tests := []struct {
+		name                string
+		model               string
+		config              thinking.ThinkingConfig
+		wantField           string
+		wantValue           interface{}
+		wantIncludeThoughts bool
+	}{
+		// Budget mode (no Levels)
+		{"budget 8k", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
+		{"budget zero", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
+		{"none mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
+		{"auto mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+
+		// Level mode (has Levels)
+		{"level high", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
+		{"level low", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
+		{"level minimal", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
+		// ModeAuto with Levels model still uses thinkingBudget=-1
+		{"auto with levels", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiCLIModelInfo(tt.model)
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotField := gjson.GetBytes(result, "request.generationConfig.thinkingConfig."+tt.wantField)
+			switch want := tt.wantValue.(type) {
+			case int:
+				if int(gotField.Int()) != want {
+					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
+				}
+			case string:
+				if gotField.String() != want {
+					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
+				}
+			case bool:
+				if gotField.Bool() != want {
+					t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
+				}
+			default:
+				t.Fatalf("unsupported wantValue type %T", tt.wantValue)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+// TestGeminiCLIApplyModeNoneWithLevel tests ModeNone with Level model.
+// When ModeNone is used with a model that has Levels, includeThoughts should be false.
+func TestGeminiCLIApplyModeNoneWithLevel(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeNone, Level: thinking.LevelLow}
+
+	result, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+
+	gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
+	if gotIncludeThoughts != false {
+		t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, false)
+	}
+
+	gotLevel := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel").String()
+	if gotLevel != "low" {
+		t.Fatalf("thinkingLevel = %q, want %q", gotLevel, "low")
+	}
+}
+
+// TestGeminiCLIApplyInvalidBody tests Apply behavior with invalid body inputs.
+func TestGeminiCLIApplyInvalidBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiCLIModelInfo("gemini-cli-budget")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"invalid json", []byte("{\"not json\"")},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply(tt.body, config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotBudget := int(gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Int())
+			if gotBudget != 8192 {
+				t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
+			if !gotIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
+			}
+		})
+	}
+}
+
+// TestGeminiCLIApplyConflictingFields tests that conflicting fields are removed.
+//
+// When applying Budget format, any existing thinkingLevel should be removed.
+// When applying Level format, any existing thinkingBudget should be removed.
+func TestGeminiCLIApplyConflictingFields(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name         string
+		model        string
+		config       thinking.ThinkingConfig
+		existingBody string
+		wantField    string // expected field to exist
+		wantNoField  string // expected field to NOT exist
+	}{
+		// Budget format should remove existing thinkingLevel
+		{
+			"budget removes level",
+			"gemini-cli-budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
+			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+		// Level format should remove existing thinkingBudget
+		{
+			"level removes budget",
+			"gemini-cli-level",
+			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
+			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
+			"thinkingLevel",
+			"thinkingBudget",
+		},
+		// ModeAuto uses budget format, should remove thinkingLevel
+		{
+			"auto removes level",
+			"gemini-cli-level",
+			thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
+			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiCLIModelInfo(tt.model)
+			result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Verify expected field exists
+			wantPath := "request.generationConfig.thinkingConfig." + tt.wantField
+			if !gjson.GetBytes(result, wantPath).Exists() {
+				t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
+			}
+
+			// Verify conflicting field was removed
+			noPath := "request.generationConfig.thinkingConfig." + tt.wantNoField
+			if gjson.GetBytes(result, noPath).Exists() {
+				t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
+			}
+		})
+	}
+}
+
+// TestGeminiCLIApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
+func TestGeminiCLIApplyThinkingNotSupported(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+
+	// Model with nil Thinking support
+	modelInfo := &registry.ModelInfo{ID: "gemini-cli-unknown", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() expected error for nil Thinking, got nil")
+	}
+
+	// Verify it's the correct error type
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Code != thinking.ErrThinkingNotSupported {
+		t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
+	}
+}
+
+// TestGeminiCLIApplyNilModelInfo tests Apply behavior when modelInfo is nil.
+func TestGeminiCLIApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, nil)
+	if err != nil {
+		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
+	}
+	if string(result) != string(body) {
+		t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
+	}
+}
+
+// TestGeminiCLIApplyEmptyModelID tests Apply when modelID is empty.
+func TestGeminiCLIApplyEmptyModelID(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() with empty modelID and nil Thinking should error")
+	}
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Model != "unknown" {
+		t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
+	}
+}
+
+// TestGeminiCLIApplyModeBudgetWithLevels tests that ModeBudget with Levels model passes through.
+// Apply layer doesn't convert - upper layer should handle Budget→Level conversion.
+func TestGeminiCLIApplyModeBudgetWithLevels(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+	// ModeBudget with Levels model: Apply returns body unchanged (conversion is upper layer's job)
+	if string(result) != string(body) {
+		t.Fatalf("Apply() ModeBudget with Levels should return original body, got: %s", result)
+	}
+}
+
+// TestGeminiCLIApplyUnsupportedMode tests behavior with unsupported Mode types.
+func TestGeminiCLIApplyUnsupportedMode(t *testing.T) {
+	applier := NewApplier()
+	body := []byte(`{"existing": "data"}`)
+
+	tests := []struct {
+		name   string
+		model  string
+		config thinking.ThinkingConfig
+	}{
+		{"unknown mode with budget model", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
+		{"unknown mode with level model", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiCLIModelInfo(tt.model)
+			result, err := applier.Apply(body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			// Unsupported modes return original body unchanged
+			if string(result) != string(body) {
+				t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
+			}
+		})
+	}
+}
+
+// TestAntigravityUsesGeminiCLIFormat tests that antigravity provider uses gemini-cli format.
+// Antigravity is registered with the same applier as gemini-cli.
+func TestAntigravityUsesGeminiCLIFormat(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		config    thinking.ThinkingConfig
+		modelInfo *registry.ModelInfo
+		wantField string
+	}{
+		{
+			"claude model budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			&registry.ModelInfo{ID: "gemini-claude-sonnet-4-5-thinking", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000}},
+			"request.generationConfig.thinkingConfig.thinkingBudget",
+		},
+		{
+			"opus model budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 32768},
+			&registry.ModelInfo{ID: "gemini-claude-opus-4-5-thinking", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000}},
+			"request.generationConfig.thinkingConfig.thinkingBudget",
+		},
+		{
+			"model with levels",
+			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
+			&registry.ModelInfo{ID: "some-model-with-levels", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000, Levels: []string{"low", "high"}}},
+			"request.generationConfig.thinkingConfig.thinkingLevel",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			if !gjson.GetBytes(got, tt.wantField).Exists() {
+				t.Fatalf("expected field %s in output: %s", tt.wantField, string(got))
+			}
+		})
+	}
+}
+
+func buildGeminiCLIModelInfo(modelID string) *registry.ModelInfo {
+	support := &registry.ThinkingSupport{}
+	switch modelID {
+	case "gemini-cli-budget":
+		support.Min = 0
+		support.Max = 32768
+		support.ZeroAllowed = true
+		support.DynamicAllowed = true
+	case "gemini-cli-level":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+		support.Levels = []string{"minimal", "low", "medium", "high"}
+	default:
+		// Unknown model - return nil Thinking to trigger error path
+		return &registry.ModelInfo{ID: modelID, Thinking: nil}
+	}
+	return &registry.ModelInfo{
+		ID:       modelID,
+		Thinking: support,
+	}
+}
diff --git a/internal/thinking/provider/iflow/apply.go b/internal/thinking/provider/iflow/apply.go
new file mode 100644
index 00000000..5bca94f2
--- /dev/null
+++ b/internal/thinking/provider/iflow/apply.go
@@ -0,0 +1,160 @@
+// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
+//
+// iFlow models use boolean toggle semantics:
+//   - GLM models: chat_template_kwargs.enable_thinking (boolean)
+//   - MiniMax models: reasoning_split (boolean)
+//
+// Level values are converted to boolean: none=false, all others=true
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-9
+package iflow
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for iFlow models.
+//
+// iFlow-specific behavior:
+//   - GLM models: enable_thinking boolean + clear_thinking=false
+//   - MiniMax models: reasoning_split boolean
+//   - Level to boolean: none=false, others=true
+//   - No quantized support (only on/off)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new iFlow thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("iflow", NewApplier())
+}
+
+// Apply applies thinking configuration to iFlow request body.
+//
+// Expected output format (GLM):
+//
+//	{
+//	  "chat_template_kwargs": {
+//	    "enable_thinking": true,
+//	    "clear_thinking": false
+//	  }
+//	}
+//
+// Expected output format (MiniMax):
+//
+//	{
+//	  "reasoning_split": true
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		modelID := modelInfo.ID
+		if modelID == "" {
+			modelID = "unknown"
+		}
+		return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+	}
+
+	if isGLMModel(modelInfo.ID) {
+		return applyGLM(body, config), nil
+	}
+
+	if isMiniMaxModel(modelInfo.ID) {
+		return applyMiniMax(body, config), nil
+	}
+
+	return body, nil
+}
+
+// configToBoolean converts ThinkingConfig to boolean for iFlow models.
+//
+// Conversion rules:
+//   - ModeNone: false
+//   - ModeAuto: true
+//   - ModeBudget + Budget=0: false
+//   - ModeBudget + Budget>0: true
+//   - ModeLevel + Level="none": false
+//   - ModeLevel + any other level: true
+//   - Default (unknown mode): true
+func configToBoolean(config thinking.ThinkingConfig) bool {
+	switch config.Mode {
+	case thinking.ModeNone:
+		return false
+	case thinking.ModeAuto:
+		return true
+	case thinking.ModeBudget:
+		return config.Budget > 0
+	case thinking.ModeLevel:
+		return config.Level != thinking.LevelNone
+	default:
+		return true
+	}
+}
+
+// applyGLM applies thinking configuration for GLM models.
+//
+// Output format when enabled:
+//
+//	{"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}}
+//
+// Output format when disabled:
+//
+//	{"chat_template_kwargs": {"enable_thinking": false}}
+//
+// Note: clear_thinking is only set when thinking is enabled, to preserve
+// thinking output in the response.
+func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
+	enableThinking := configToBoolean(config)
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
+
+	// clear_thinking only needed when thinking is enabled
+	if enableThinking {
+		result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
+	}
+
+	return result
+}
+
+// applyMiniMax applies thinking configuration for MiniMax models.
+//
+// Output format:
+//
+//	{"reasoning_split": true/false}
+func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
+	reasoningSplit := configToBoolean(config)
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit)
+
+	return result
+}
+
+// isGLMModel determines if the model is a GLM series model.
+// GLM models use chat_template_kwargs.enable_thinking format.
+func isGLMModel(modelID string) bool {
+	return strings.HasPrefix(strings.ToLower(modelID), "glm")
+}
+
+// isMiniMaxModel determines if the model is a MiniMax series model.
+// MiniMax models use reasoning_split format.
+func isMiniMaxModel(modelID string) bool {
+	return strings.HasPrefix(strings.ToLower(modelID), "minimax")
+}
diff --git a/internal/thinking/provider/iflow/apply_test.go b/internal/thinking/provider/iflow/apply_test.go
new file mode 100644
index 00000000..f0c2a35b
--- /dev/null
+++ b/internal/thinking/provider/iflow/apply_test.go
@@ -0,0 +1,328 @@
+// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
+package iflow
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestNewApplier(t *testing.T) {
+	tests := []struct {
+		name string
+	}{
+		{"default"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			applier := NewApplier()
+			if applier == nil {
+				t.Fatalf("expected non-nil applier")
+			}
+		})
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	tests := []struct {
+		name    string
+		applier thinking.ProviderApplier
+	}{
+		{"default", NewApplier()},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.applier == nil {
+				t.Fatalf("expected thinking.ProviderApplier implementation")
+			}
+		})
+	}
+}
+
+func TestApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"json body", []byte(`{"model":"glm-4.6"}`)},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := applier.Apply(tt.body, thinking.ThinkingConfig{}, nil)
+			if err != nil {
+				t.Fatalf("expected nil error, got %v", err)
+			}
+			if !bytes.Equal(got, tt.body) {
+				t.Fatalf("expected body unchanged, got %s", string(got))
+			}
+		})
+	}
+}
+
+func TestApplyMissingThinkingSupport(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		modelID   string
+		wantModel string
+	}{
+		{"model id", "glm-4.6", "glm-4.6"},
+		{"empty model id", "", "unknown"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := &registry.ModelInfo{ID: tt.modelID}
+			got, err := applier.Apply([]byte(`{"model":"`+tt.modelID+`"}`), thinking.ThinkingConfig{}, modelInfo)
+			if err == nil {
+				t.Fatalf("expected error, got nil")
+			}
+			if got != nil {
+				t.Fatalf("expected nil body on error, got %s", string(got))
+			}
+			thinkingErr, ok := err.(*thinking.ThinkingError)
+			if !ok {
+				t.Fatalf("expected ThinkingError, got %T", err)
+			}
+			if thinkingErr.Code != thinking.ErrThinkingNotSupported {
+				t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
+			}
+			if thinkingErr.Model != tt.wantModel {
+				t.Fatalf("expected model %s, got %s", tt.wantModel, thinkingErr.Model)
+			}
+		})
+	}
+}
+
+func TestConfigToBoolean(t *testing.T) {
+	tests := []struct {
+		name   string
+		config thinking.ThinkingConfig
+		want   bool
+	}{
+		{"mode none", thinking.ThinkingConfig{Mode: thinking.ModeNone}, false},
+		{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true},
+		{"budget zero", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false},
+		{"budget positive", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true},
+		{"level none", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false},
+		{"level minimal", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true},
+		{"level low", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true},
+		{"level medium", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true},
+		{"level high", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true},
+		{"level xhigh", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true},
+		{"zero value config", thinking.ThinkingConfig{}, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := configToBoolean(tt.config); got != tt.want {
+				t.Fatalf("configToBoolean(%+v) = %v, want %v", tt.config, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestApplyGLM(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name         string
+		modelID      string
+		body         []byte
+		config       thinking.ThinkingConfig
+		wantEnable   bool
+		wantPreserve string
+	}{
+		{"mode none", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, ""},
+		{"level none", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, ""},
+		{"mode auto", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
+		{"level minimal", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, ""},
+		{"level low", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, ""},
+		{"level medium", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, ""},
+		{"level high", "GLM-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, ""},
+		{"level xhigh", "glm-z1-preview", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, ""},
+		{"budget zero", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, ""},
+		{"budget 1000", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, ""},
+		{"preserve fields", "glm-4.6", []byte(`{"model":"glm-4.6","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "glm-4.6"},
+		{"empty body", "glm-4.6", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
+		{"malformed json", "glm-4.6", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := &registry.ModelInfo{
+				ID:       tt.modelID,
+				Thinking: &registry.ThinkingSupport{},
+			}
+			got, err := applier.Apply(tt.body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if !gjson.ValidBytes(got) {
+				t.Fatalf("expected valid JSON, got %s", string(got))
+			}
+
+			enableResult := gjson.GetBytes(got, "chat_template_kwargs.enable_thinking")
+			if !enableResult.Exists() {
+				t.Fatalf("enable_thinking missing")
+			}
+			gotEnable := enableResult.Bool()
+			if gotEnable != tt.wantEnable {
+				t.Fatalf("enable_thinking = %v, want %v", gotEnable, tt.wantEnable)
+			}
+
+			// clear_thinking only set when enable_thinking=true
+			clearResult := gjson.GetBytes(got, "chat_template_kwargs.clear_thinking")
+			if tt.wantEnable {
+				if !clearResult.Exists() {
+					t.Fatalf("clear_thinking missing when enable_thinking=true")
+				}
+				if clearResult.Bool() {
+					t.Fatalf("clear_thinking = %v, want false", clearResult.Bool())
+				}
+			} else {
+				if clearResult.Exists() {
+					t.Fatalf("clear_thinking should not exist when enable_thinking=false")
+				}
+			}
+
+			if tt.wantPreserve != "" {
+				gotModel := gjson.GetBytes(got, "model").String()
+				if gotModel != tt.wantPreserve {
+					t.Fatalf("model = %q, want %q", gotModel, tt.wantPreserve)
+				}
+				if !gjson.GetBytes(got, "extra.keep").Bool() {
+					t.Fatalf("expected extra.keep preserved")
+				}
+			}
+		})
+	}
+}
+
+func TestApplyMiniMax(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		modelID   string
+		body      []byte
+		config    thinking.ThinkingConfig
+		wantSplit bool
+		wantModel string
+		wantKeep  bool
+	}{
+		{"mode none", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, "", false},
+		{"level none", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, "", false},
+		{"mode auto", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
+		{"level high", "MINIMAX-M2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, "", false},
+		{"level low", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, "", false},
+		{"level minimal", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, "", false},
+		{"level medium", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, "", false},
+		{"level xhigh", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, "", false},
+		{"budget zero", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, "", false},
+		{"budget 1000", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, "", false},
+		{"unknown level", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "unknown"}, true, "", false},
+		{"preserve fields", "minimax-m2", []byte(`{"model":"minimax-m2","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "minimax-m2", true},
+		{"empty body", "minimax-m2", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
+		{"malformed json", "minimax-m2", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := &registry.ModelInfo{
+				ID:       tt.modelID,
+				Thinking: &registry.ThinkingSupport{},
+			}
+			got, err := applier.Apply(tt.body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if !gjson.ValidBytes(got) {
+				t.Fatalf("expected valid JSON, got %s", string(got))
+			}
+
+			splitResult := gjson.GetBytes(got, "reasoning_split")
+			if !splitResult.Exists() {
+				t.Fatalf("reasoning_split missing")
+			}
+			// Verify JSON type is boolean, not string
+			if splitResult.Type != gjson.True && splitResult.Type != gjson.False {
+				t.Fatalf("reasoning_split should be boolean, got type %v", splitResult.Type)
+			}
+			gotSplit := splitResult.Bool()
+			if gotSplit != tt.wantSplit {
+				t.Fatalf("reasoning_split = %v, want %v", gotSplit, tt.wantSplit)
+			}
+
+			if tt.wantModel != "" {
+				gotModel := gjson.GetBytes(got, "model").String()
+				if gotModel != tt.wantModel {
+					t.Fatalf("model = %q, want %q", gotModel, tt.wantModel)
+				}
+				if tt.wantKeep && !gjson.GetBytes(got, "extra.keep").Bool() {
+					t.Fatalf("expected extra.keep preserved")
+				}
+			}
+		})
+	}
+}
+
+// TestIsGLMModel tests the GLM model detection.
+//
+// Depends on: Epic 9 Story 9-1
+func TestIsGLMModel(t *testing.T) {
+	tests := []struct {
+		name    string
+		model   string
+		wantGLM bool
+	}{
+		{"glm-4.6", "glm-4.6", true},
+		{"glm-z1-preview", "glm-z1-preview", true},
+		{"glm uppercase", "GLM-4.7", true},
+		{"minimax-01", "minimax-01", false},
+		{"gpt-5.2", "gpt-5.2", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := isGLMModel(tt.model); got != tt.wantGLM {
+				t.Fatalf("isGLMModel(%q) = %v, want %v", tt.model, got, tt.wantGLM)
+			}
+		})
+	}
+}
+
+// TestIsMiniMaxModel tests the MiniMax model detection.
+//
+// Depends on: Epic 9 Story 9-1
+func TestIsMiniMaxModel(t *testing.T) {
+	tests := []struct {
+		name        string
+		model       string
+		wantMiniMax bool
+	}{
+		{"minimax-01", "minimax-01", true},
+		{"minimax uppercase", "MINIMAX-M2", true},
+		{"glm-4.6", "glm-4.6", false},
+		{"gpt-5.2", "gpt-5.2", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := isMiniMaxModel(tt.model); got != tt.wantMiniMax {
+				t.Fatalf("isMiniMaxModel(%q) = %v, want %v", tt.model, got, tt.wantMiniMax)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/provider/openai/apply.go b/internal/thinking/provider/openai/apply.go
new file mode 100644
index 00000000..810faf34
--- /dev/null
+++ b/internal/thinking/provider/openai/apply.go
@@ -0,0 +1,135 @@
+// Package openai implements thinking configuration for OpenAI/Codex models.
+//
+// OpenAI models use the reasoning_effort format with discrete levels
+// (low/medium/high). Some models support xhigh and none levels.
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
+package openai
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for OpenAI models.
+//
+// OpenAI-specific behavior:
+//   - Output format: reasoning_effort (string: low/medium/high/xhigh)
+//   - Level-only mode: no numeric budget support
+//   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new OpenAI thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("openai", NewApplier())
+}
+
+// Apply applies thinking configuration to OpenAI request body.
+//
+// Expected output format:
+//
+//	{
+//	  "reasoning_effort": "high"
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return applyCompatibleOpenAI(body, config)
+	}
+
+	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
+	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeLevel {
+		result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level))
+		return result, nil
+	}
+
+	effort := ""
+	support := modelInfo.Thinking
+	if config.Budget == 0 {
+		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
+			effort = string(thinking.LevelNone)
+		}
+	}
+	if effort == "" && config.Level != "" {
+		effort = string(config.Level)
+	}
+	if effort == "" && len(support.Levels) > 0 {
+		effort = support.Levels[0]
+	}
+	if effort == "" {
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
+	return result, nil
+}
+
+func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		effort = string(thinking.LevelNone)
+		if config.Level != "" {
+			effort = string(config.Level)
+		}
+	case thinking.ModeAuto:
+		// Auto mode for user-defined models: pass through as "auto"
+		effort = string(thinking.LevelAuto)
+	case thinking.ModeBudget:
+		// Budget mode: convert budget to level using threshold mapping
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	default:
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
+	return result, nil
+}
+
+func hasLevel(levels []string, target string) bool {
+	for _, level := range levels {
+		if strings.EqualFold(strings.TrimSpace(level), target) {
+			return true
+		}
+	}
+	return false
+}
diff --git a/internal/thinking/provider/openai/apply_test.go b/internal/thinking/provider/openai/apply_test.go
new file mode 100644
index 00000000..88c1800a
--- /dev/null
+++ b/internal/thinking/provider/openai/apply_test.go
@@ -0,0 +1,343 @@
+// Package openai implements thinking configuration for OpenAI/Codex models.
+package openai
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func buildOpenAIModelInfo(modelID string) *registry.ModelInfo {
+	info := registry.LookupStaticModelInfo(modelID)
+	if info != nil {
+		return info
+	}
+	// Fallback with complete ThinkingSupport matching real OpenAI model capabilities
+	return &registry.ModelInfo{
+		ID: modelID,
+		Thinking: &registry.ThinkingSupport{
+			Min:         1024,
+			Max:         32768,
+			ZeroAllowed: true,
+			Levels:      []string{"none", "low", "medium", "high", "xhigh"},
+		},
+	}
+}
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatalf("expected non-nil applier")
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	_, ok := interface{}(NewApplier()).(thinking.ProviderApplier)
+	if !ok {
+		t.Fatalf("expected Applier to implement thinking.ProviderApplier")
+	}
+}
+
+func TestApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+	body := []byte(`{"model":"gpt-5.2"}`)
+	got, err := applier.Apply(body, thinking.ThinkingConfig{}, nil)
+	if err != nil {
+		t.Fatalf("expected nil error, got %v", err)
+	}
+	if string(got) != string(body) {
+		t.Fatalf("expected body unchanged, got %s", string(got))
+	}
+}
+
+func TestApplyMissingThinkingSupport(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := &registry.ModelInfo{ID: "gpt-5.2"}
+	got, err := applier.Apply([]byte(`{"model":"gpt-5.2"}`), thinking.ThinkingConfig{}, modelInfo)
+	if err == nil {
+		t.Fatalf("expected error, got nil")
+	}
+	if got != nil {
+		t.Fatalf("expected nil body on error, got %s", string(got))
+	}
+	thinkingErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("expected ThinkingError, got %T", err)
+	}
+	if thinkingErr.Code != thinking.ErrThinkingNotSupported {
+		t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
+	}
+	if thinkingErr.Model != "gpt-5.2" {
+		t.Fatalf("expected model gpt-5.2, got %s", thinkingErr.Model)
+	}
+}
+
+// TestApplyLevel tests Apply with ModeLevel (unit test, no ValidateConfig).
+func TestApplyLevel(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	tests := []struct {
+		name  string
+		level thinking.ThinkingLevel
+		want  string
+	}{
+		{"high", thinking.LevelHigh, "high"},
+		{"medium", thinking.LevelMedium, "medium"},
+		{"low", thinking.LevelLow, "low"},
+		{"xhigh", thinking.LevelXHigh, "xhigh"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: tt.level}, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+// TestApplyModeNone tests Apply with ModeNone (unit test).
+func TestApplyModeNone(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		config    thinking.ThinkingConfig
+		modelInfo *registry.ModelInfo
+		want      string
+	}{
+		{"zero allowed", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, &registry.ModelInfo{ID: "gpt-5.2", Thinking: &registry.ThinkingSupport{ZeroAllowed: true, Levels: []string{"none", "low"}}}, "none"},
+		{"clamped to level", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 128, Level: thinking.LevelLow}, &registry.ModelInfo{ID: "gpt-5", Thinking: &registry.ThinkingSupport{Levels: []string{"minimal", "low"}}}, "low"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+// TestApplyPassthrough tests that unsupported modes pass through unchanged.
+func TestApplyPassthrough(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	tests := []struct {
+		name   string
+		config thinking.ThinkingConfig
+	}{
+		{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}},
+		{"mode budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			body := []byte(`{"model":"gpt-5.2"}`)
+			result, err := applier.Apply(body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != string(body) {
+				t.Fatalf("Apply() result = %s, want %s", string(result), string(body))
+			}
+		})
+	}
+}
+
+// TestApplyInvalidBody tests Apply with invalid body input.
+func TestApplyInvalidBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"invalid json", []byte(`{"not json"`)},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply(tt.body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if !gjson.ValidBytes(result) {
+				t.Fatalf("Apply() result is not valid JSON: %s", string(result))
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "high" {
+				t.Fatalf("reasoning_effort = %q, want %q", got, "high")
+			}
+		})
+	}
+}
+
+// TestApplyPreservesFields tests that existing body fields are preserved.
+func TestApplyPreservesFields(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	body := []byte(`{"model":"gpt-5.2","messages":[]}`)
+	result, err := applier.Apply(body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+	if got := gjson.GetBytes(result, "model").String(); got != "gpt-5.2" {
+		t.Fatalf("model = %q, want %q", got, "gpt-5.2")
+	}
+	if !gjson.GetBytes(result, "messages").Exists() {
+		t.Fatalf("messages missing from result: %s", string(result))
+	}
+	if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "low" {
+		t.Fatalf("reasoning_effort = %q, want %q", got, "low")
+	}
+}
+
+// TestHasLevel tests the hasLevel helper function.
+func TestHasLevel(t *testing.T) {
+	tests := []struct {
+		name   string
+		levels []string
+		target string
+		want   bool
+	}{
+		{"exact match", []string{"low", "medium", "high"}, "medium", true},
+		{"case insensitive", []string{"low", "medium", "high"}, "MEDIUM", true},
+		{"with spaces", []string{"low", " medium ", "high"}, "medium", true},
+		{"not found", []string{"low", "medium", "high"}, "xhigh", false},
+		{"empty levels", []string{}, "medium", false},
+		{"none level", []string{"none", "low", "medium"}, "none", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := hasLevel(tt.levels, tt.target); got != tt.want {
+				t.Fatalf("hasLevel(%v, %q) = %v, want %v", tt.levels, tt.target, got, tt.want)
+			}
+		})
+	}
+}
+
+// --- End-to-End Tests (ValidateConfig → Apply) ---
+
+// TestE2EApply tests the full flow: ValidateConfig → Apply.
+func TestE2EApply(t *testing.T) {
+	tests := []struct {
+		name   string
+		model  string
+		config thinking.ThinkingConfig
+		want   string
+	}{
+		{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high"},
+		{"level medium", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium"},
+		{"level low", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "low"},
+		{"level xhigh", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, "xhigh"},
+		{"mode none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "none"},
+		{"budget to level", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "medium"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildOpenAIModelInfo(tt.model)
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+// TestE2EApplyOutputFormat tests the full flow with exact JSON output verification.
+func TestE2EApplyOutputFormat(t *testing.T) {
+	tests := []struct {
+		name     string
+		model    string
+		config   thinking.ThinkingConfig
+		wantJSON string
+	}{
+		{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, `{"reasoning_effort":"high"}`},
+		{"level none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, `{"reasoning_effort":"none"}`},
+		{"budget converted", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, `{"reasoning_effort":"medium"}`},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildOpenAIModelInfo(tt.model)
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != tt.wantJSON {
+				t.Fatalf("Apply() result = %s, want %s", string(result), tt.wantJSON)
+			}
+		})
+	}
+}
+
+// TestE2EApplyWithExistingBody tests the full flow with existing body fields.
+func TestE2EApplyWithExistingBody(t *testing.T) {
+	tests := []struct {
+		name       string
+		body       string
+		config     thinking.ThinkingConfig
+		wantEffort string
+		wantModel  string
+	}{
+		{"empty body", `{}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high", ""},
+		{"preserve fields", `{"model":"gpt-5.2","messages":[]}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium", "gpt-5.2"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildOpenAIModelInfo("gpt-5.2")
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(tt.body), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.wantEffort {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.wantEffort)
+			}
+			if tt.wantModel != "" {
+				if got := gjson.GetBytes(result, "model").String(); got != tt.wantModel {
+					t.Fatalf("model = %q, want %q", got, tt.wantModel)
+				}
+			}
+		})
+	}
+}
diff --git a/internal/thinking/provider_map_test.go b/internal/thinking/provider_map_test.go
new file mode 100644
index 00000000..0944c246
--- /dev/null
+++ b/internal/thinking/provider_map_test.go
@@ -0,0 +1,51 @@
+// Package thinking_test provides external tests for the thinking package.
+//
+// This file uses package thinking_test (external) to allow importing provider
+// subpackages, which triggers their init() functions to register appliers.
+// This avoids import cycles that would occur if thinking package imported providers directly.
+package thinking_test
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+
+	// Blank imports to trigger provider init() registration
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
+)
+
+func TestProviderAppliersBasic(t *testing.T) {
+	tests := []struct {
+		name     string
+		provider string
+		wantNil  bool
+	}{
+		{"gemini provider", "gemini", false},
+		{"gemini-cli provider", "gemini-cli", false},
+		{"claude provider", "claude", false},
+		{"openai provider", "openai", false},
+		{"iflow provider", "iflow", false},
+		{"antigravity provider", "antigravity", false},
+		{"unknown provider", "unknown", true},
+		{"empty provider", "", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := thinking.GetProviderApplier(tt.provider)
+			if tt.wantNil {
+				if got != nil {
+					t.Fatalf("GetProviderApplier(%q) = %T, want nil", tt.provider, got)
+				}
+				return
+			}
+			if got == nil {
+				t.Fatalf("GetProviderApplier(%q) = nil, want non-nil", tt.provider)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/strip.go b/internal/thinking/strip.go
new file mode 100644
index 00000000..4904d4d5
--- /dev/null
+++ b/internal/thinking/strip.go
@@ -0,0 +1,54 @@
+// Package thinking provides unified thinking configuration processing.
+package thinking
+
+import (
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// StripThinkingConfig removes thinking configuration fields from request body.
+//
+// This function is used when a model doesn't support thinking but the request
+// contains thinking configuration. The configuration is silently removed to
+// prevent upstream API errors.
+//
+// Parameters:
+//   - body: Original request body JSON
+//   - provider: Provider name (determines which fields to strip)
+//
+// Returns:
+//   - Modified request body JSON with thinking configuration removed
+//   - Original body is returned unchanged if:
+//   - body is empty or invalid JSON
+//   - provider is unknown
+//   - no thinking configuration found
+func StripThinkingConfig(body []byte, provider string) []byte {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		return body
+	}
+
+	switch provider {
+	case "claude":
+		result, _ := sjson.DeleteBytes(body, "thinking")
+		return result
+	case "gemini":
+		result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
+		return result
+	case "gemini-cli", "antigravity":
+		result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
+		return result
+	case "openai":
+		result, _ := sjson.DeleteBytes(body, "reasoning_effort")
+		return result
+	case "codex":
+		result, _ := sjson.DeleteBytes(body, "reasoning.effort")
+		return result
+	case "iflow":
+		result, _ := sjson.DeleteBytes(body, "chat_template_kwargs.enable_thinking")
+		result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
+		result, _ = sjson.DeleteBytes(result, "reasoning_split")
+		return result
+	default:
+		return body
+	}
+}
diff --git a/internal/thinking/strip_test.go b/internal/thinking/strip_test.go
new file mode 100644
index 00000000..edd6bd1a
--- /dev/null
+++ b/internal/thinking/strip_test.go
@@ -0,0 +1,66 @@
+// Package thinking_test provides tests for thinking config stripping.
+package thinking_test
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestStripThinkingConfig(t *testing.T) {
+	tests := []struct {
+		name      string
+		body      string
+		provider  string
+		stripped  []string
+		preserved []string
+	}{
+		{"claude thinking", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude", []string{"thinking"}, []string{"model"}},
+		{"gemini thinkingConfig", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}},
+		{"gemini-cli thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}}`, "gemini-cli", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.temperature"}},
+		{"antigravity thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":4096},"maxTokens":1024}}}`, "antigravity", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.maxTokens"}},
+		{"openai reasoning_effort", `{"reasoning_effort":"high","model":"gpt-5"}`, "openai", []string{"reasoning_effort"}, []string{"model"}},
+		{"iflow glm", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}},
+		{"iflow minimax", `{"reasoning_split":true,"model":"minimax"}`, "iflow", []string{"reasoning_split"}, []string{"model"}},
+		{"iflow both formats", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false},"reasoning_split":true,"model":"mixed"}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking", "reasoning_split"}, []string{"model"}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider)
+
+			for _, path := range tt.stripped {
+				if gjson.GetBytes(got, path).Exists() {
+					t.Fatalf("expected %s to be stripped, got %s", path, string(got))
+				}
+			}
+			for _, path := range tt.preserved {
+				if !gjson.GetBytes(got, path).Exists() {
+					t.Fatalf("expected %s to be preserved, got %s", path, string(got))
+				}
+			}
+		})
+	}
+}
+
+func TestStripThinkingConfigPassthrough(t *testing.T) {
+	tests := []struct {
+		name     string
+		body     string
+		provider string
+	}{
+		{"empty body", ``, "claude"},
+		{"invalid json", `{not valid`, "claude"},
+		{"unknown provider", `{"thinking":{"budget_tokens":8192}}`, "unknown"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider)
+			if string(got) != tt.body {
+				t.Fatalf("StripThinkingConfig() = %s, want passthrough %s", string(got), tt.body)
+			}
+		})
+	}
+}
diff --git a/internal/thinking/suffix.go b/internal/thinking/suffix.go
new file mode 100644
index 00000000..e3b4087e
--- /dev/null
+++ b/internal/thinking/suffix.go
@@ -0,0 +1,170 @@
+// Package thinking provides unified thinking configuration processing.
+//
+// This file implements suffix parsing functionality for extracting
+// thinking configuration from model names in the format model(value).
+package thinking
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// ParseSuffix extracts thinking suffix from a model name.
+//
+// The suffix format is: model-name(value)
+// Examples:
+//   - "claude-sonnet-4-5(16384)" -> ModelName="claude-sonnet-4-5", RawSuffix="16384"
+//   - "gpt-5.2(high)" -> ModelName="gpt-5.2", RawSuffix="high"
+//   - "gemini-2.5-pro" -> ModelName="gemini-2.5-pro", HasSuffix=false
+//
+// This function only extracts the suffix; it does not validate or interpret
+// the suffix content. Use ParseNumericSuffix, ParseLevelSuffix, etc. for
+// content interpretation.
+func ParseSuffix(model string) SuffixResult {
+	// Find the last opening parenthesis
+	lastOpen := strings.LastIndex(model, "(")
+	if lastOpen == -1 {
+		return SuffixResult{ModelName: model, HasSuffix: false}
+	}
+
+	// Check if the string ends with a closing parenthesis
+	if !strings.HasSuffix(model, ")") {
+		return SuffixResult{ModelName: model, HasSuffix: false}
+	}
+
+	// Extract components
+	modelName := model[:lastOpen]
+	rawSuffix := model[lastOpen+1 : len(model)-1]
+
+	return SuffixResult{
+		ModelName: modelName,
+		HasSuffix: true,
+		RawSuffix: rawSuffix,
+	}
+}
+
+// ParseSuffixWithError extracts thinking suffix and returns an error on invalid format.
+//
+// Invalid format cases:
+//   - Contains "(" but does not end with ")"
+//   - Contains ")" without any "("
+//
+// The error message includes the original input for debugging context.
+func ParseSuffixWithError(model string) (SuffixResult, error) {
+	lastOpen := strings.LastIndex(model, "(")
+	if lastOpen == -1 {
+		if strings.Contains(model, ")") {
+			return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model))
+		}
+		return SuffixResult{ModelName: model, HasSuffix: false}, nil
+	}
+
+	if !strings.HasSuffix(model, ")") {
+		return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model))
+	}
+
+	return ParseSuffix(model), nil
+}
+
+// ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value.
+//
+// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer.
+// Only non-negative integers are considered valid numeric suffixes.
+//
+// Platform note: The budget value uses Go's int type, which is 32-bit on 32-bit
+// systems and 64-bit on 64-bit systems. Values exceeding the platform's int range
+// will return ok=false.
+//
+// Leading zeros are accepted: "08192" parses as 8192.
+//
+// Examples:
+//   - "8192" -> budget=8192, ok=true
+//   - "0" -> budget=0, ok=true (represents ModeNone)
+//   - "08192" -> budget=8192, ok=true (leading zeros accepted)
+//   - "-1" -> budget=0, ok=false (negative numbers are not valid numeric suffixes)
+//   - "high" -> budget=0, ok=false (not a number)
+//   - "9223372036854775808" -> budget=0, ok=false (overflow on 64-bit systems)
+//
+// For special handling of -1 as auto mode, use ParseSpecialSuffix instead.
+func ParseNumericSuffix(rawSuffix string) (budget int, ok bool) {
+	if rawSuffix == "" {
+		return 0, false
+	}
+
+	value, err := strconv.Atoi(rawSuffix)
+	if err != nil {
+		return 0, false
+	}
+
+	// Negative numbers are not valid numeric suffixes
+	// -1 should be handled by special value parsing as "auto"
+	if value < 0 {
+		return 0, false
+	}
+
+	return value, true
+}
+
+// ParseSpecialSuffix attempts to parse a raw suffix as a special thinking mode value.
+//
+// This function handles special strings that represent a change in thinking mode:
+//   - "none" -> ModeNone (disables thinking)
+//   - "auto" -> ModeAuto (automatic/dynamic thinking)
+//   - "-1"   -> ModeAuto (numeric representation of auto mode)
+//
+// String values are case-insensitive.
+func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) {
+	if rawSuffix == "" {
+		return ModeBudget, false
+	}
+
+	// Case-insensitive matching
+	switch strings.ToLower(rawSuffix) {
+	case "none":
+		return ModeNone, true
+	case "auto", "-1":
+		return ModeAuto, true
+	default:
+		return ModeBudget, false
+	}
+}
+
+// ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level.
+//
+// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level.
+// Only discrete effort levels are valid: minimal, low, medium, high, xhigh.
+// Level matching is case-insensitive.
+//
+// Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix
+// instead. This separation allows callers to prioritize special value handling.
+//
+// Examples:
+//   - "high" -> level=LevelHigh, ok=true
+//   - "HIGH" -> level=LevelHigh, ok=true (case insensitive)
+//   - "medium" -> level=LevelMedium, ok=true
+//   - "none" -> level="", ok=false (special value, use ParseSpecialSuffix)
+//   - "auto" -> level="", ok=false (special value, use ParseSpecialSuffix)
+//   - "8192" -> level="", ok=false (numeric, use ParseNumericSuffix)
+//   - "ultra" -> level="", ok=false (unknown level)
+func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) {
+	if rawSuffix == "" {
+		return "", false
+	}
+
+	// Case-insensitive matching
+	switch strings.ToLower(rawSuffix) {
+	case "minimal":
+		return LevelMinimal, true
+	case "low":
+		return LevelLow, true
+	case "medium":
+		return LevelMedium, true
+	case "high":
+		return LevelHigh, true
+	case "xhigh":
+		return LevelXHigh, true
+	default:
+		return "", false
+	}
+}
diff --git a/internal/thinking/suffix_test.go b/internal/thinking/suffix_test.go
new file mode 100644
index 00000000..b3ea3ed3
--- /dev/null
+++ b/internal/thinking/suffix_test.go
@@ -0,0 +1,313 @@
+// Package thinking provides unified thinking configuration processing.
+package thinking
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestParseSuffix tests the ParseSuffix function.
+//
+// ParseSuffix extracts thinking suffix from model name.
+// Format: model-name(value) where value is the raw suffix content.
+// This function only extracts; interpretation is done by other functions.
+func TestParseSuffix(t *testing.T) {
+	tests := []struct {
+		name       string
+		model      string
+		wantModel  string
+		wantSuffix bool
+		wantRaw    string
+	}{
+		{"no suffix", "claude-sonnet-4-5", "claude-sonnet-4-5", false, ""},
+		{"numeric suffix", "model(1000)", "model", true, "1000"},
+		{"level suffix", "gpt-5(high)", "gpt-5", true, "high"},
+		{"auto suffix", "gemini-2.5-pro(auto)", "gemini-2.5-pro", true, "auto"},
+		{"none suffix", "model(none)", "model", true, "none"},
+		{"complex model name", "gemini-2.5-flash-lite(8192)", "gemini-2.5-flash-lite", true, "8192"},
+		{"alias with suffix", "g25p(1000)", "g25p", true, "1000"},
+		{"empty suffix", "model()", "model", true, ""},
+		{"nested parens", "model(a(b))", "model(a", true, "b)"},
+		{"no model name", "(1000)", "", true, "1000"},
+		{"unmatched open", "model(", "model(", false, ""},
+		{"unmatched close", "model)", "model)", false, ""},
+		{"paren not at end", "model(1000)extra", "model(1000)extra", false, ""},
+		{"empty string", "", "", false, ""},
+		{"large budget", "claude-opus(128000)", "claude-opus", true, "128000"},
+		{"xhigh level", "gpt-5.2(xhigh)", "gpt-5.2", true, "xhigh"},
+		{"minimal level", "model(minimal)", "model", true, "minimal"},
+		{"medium level", "model(medium)", "model", true, "medium"},
+		{"low level", "model(low)", "model", true, "low"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ParseSuffix(tt.model)
+			if got.ModelName != tt.wantModel {
+				t.Errorf("ModelName = %q, want %q", got.ModelName, tt.wantModel)
+			}
+			if got.HasSuffix != tt.wantSuffix {
+				t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantSuffix)
+			}
+			if got.RawSuffix != tt.wantRaw {
+				t.Errorf("RawSuffix = %q, want %q", got.RawSuffix, tt.wantRaw)
+			}
+		})
+	}
+}
+
+// TestParseSuffixWithError tests invalid suffix error reporting.
+func TestParseSuffixWithError(t *testing.T) {
+	tests := []struct {
+		name          string
+		model         string
+		wantHasSuffix bool
+	}{
+		{"missing close paren", "model(abc", false},
+		{"unmatched close paren", "model)", false},
+		{"paren not at end", "model(1000)extra", false},
+		{"no suffix", "gpt-5", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := ParseSuffixWithError(tt.model)
+			if tt.name == "no suffix" {
+				if err != nil {
+					t.Fatalf("ParseSuffixWithError(%q) error = %v, want nil", tt.model, err)
+				}
+				if got.HasSuffix != tt.wantHasSuffix {
+					t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix)
+				}
+				return
+			}
+
+			if err == nil {
+				t.Fatalf("ParseSuffixWithError(%q) error = nil, want error", tt.model)
+			}
+			thinkingErr, ok := err.(*ThinkingError)
+			if !ok {
+				t.Fatalf("ParseSuffixWithError(%q) error type = %T, want *ThinkingError", tt.model, err)
+			}
+			if thinkingErr.Code != ErrInvalidSuffix {
+				t.Errorf("error code = %v, want %v", thinkingErr.Code, ErrInvalidSuffix)
+			}
+			if !strings.Contains(thinkingErr.Message, tt.model) {
+				t.Errorf("message %q does not include input %q", thinkingErr.Message, tt.model)
+			}
+			if got.HasSuffix != tt.wantHasSuffix {
+				t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix)
+			}
+		})
+	}
+}
+
+// TestParseSuffixNumeric tests numeric suffix parsing.
+//
+// ParseNumericSuffix parses raw suffix content as integer budget.
+// Only non-negative integers are valid. Negative numbers return ok=false.
+func TestParseSuffixNumeric(t *testing.T) {
+	tests := []struct {
+		name       string
+		rawSuffix  string
+		wantBudget int
+		wantOK     bool
+	}{
+		{"small budget", "512", 512, true},
+		{"standard budget", "8192", 8192, true},
+		{"large budget", "100000", 100000, true},
+		{"max int32", "2147483647", 2147483647, true},
+		{"max int64", "9223372036854775807", 9223372036854775807, true},
+		{"zero", "0", 0, true},
+		{"negative one", "-1", 0, false},
+		{"negative", "-100", 0, false},
+		{"int64 overflow", "9223372036854775808", 0, false},
+		{"large overflow", "99999999999999999999", 0, false},
+		{"not a number", "abc", 0, false},
+		{"level string", "high", 0, false},
+		{"float", "1.5", 0, false},
+		{"empty", "", 0, false},
+		{"leading zero", "08192", 8192, true},
+		{"whitespace", "  8192  ", 0, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			budget, ok := ParseNumericSuffix(tt.rawSuffix)
+			if budget != tt.wantBudget {
+				t.Errorf("budget = %d, want %d", budget, tt.wantBudget)
+			}
+			if ok != tt.wantOK {
+				t.Errorf("ok = %v, want %v", ok, tt.wantOK)
+			}
+		})
+	}
+}
+
+// TestParseSuffixLevel tests level suffix parsing.
+//
+// ParseLevelSuffix parses raw suffix content as discrete thinking level.
+// Only effort levels (minimal, low, medium, high, xhigh) are valid.
+// Special values (none, auto) return ok=false - use ParseSpecialSuffix instead.
+func TestParseSuffixLevel(t *testing.T) {
+	tests := []struct {
+		name      string
+		rawSuffix string
+		wantLevel ThinkingLevel
+		wantOK    bool
+	}{
+		{"minimal", "minimal", LevelMinimal, true},
+		{"low", "low", LevelLow, true},
+		{"medium", "medium", LevelMedium, true},
+		{"high", "high", LevelHigh, true},
+		{"xhigh", "xhigh", LevelXHigh, true},
+		{"case HIGH", "HIGH", LevelHigh, true},
+		{"case High", "High", LevelHigh, true},
+		{"case hIgH", "hIgH", LevelHigh, true},
+		{"case MINIMAL", "MINIMAL", LevelMinimal, true},
+		{"case XHigh", "XHigh", LevelXHigh, true},
+		{"none special", "none", "", false},
+		{"auto special", "auto", "", false},
+		{"unknown ultra", "ultra", "", false},
+		{"unknown maximum", "maximum", "", false},
+		{"unknown invalid", "invalid", "", false},
+		{"numeric", "8192", "", false},
+		{"numeric zero", "0", "", false},
+		{"empty", "", "", false},
+		{"whitespace", "  high  ", "", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			level, ok := ParseLevelSuffix(tt.rawSuffix)
+			if level != tt.wantLevel {
+				t.Errorf("level = %q, want %q", level, tt.wantLevel)
+			}
+			if ok != tt.wantOK {
+				t.Errorf("ok = %v, want %v", ok, tt.wantOK)
+			}
+		})
+	}
+}
+
+// TestParseSuffixSpecialValues tests special value suffix parsing.
+//
+// Depends on: Epic 3 Story 3-4 (special value suffix parsing)
+func TestParseSuffixSpecialValues(t *testing.T) {
+	tests := []struct {
+		name      string
+		rawSuffix string
+		wantMode  ThinkingMode
+		wantOK    bool
+	}{
+		{"none", "none", ModeNone, true},
+		{"auto", "auto", ModeAuto, true},
+		{"negative one", "-1", ModeAuto, true},
+		{"case NONE", "NONE", ModeNone, true},
+		{"case Auto", "Auto", ModeAuto, true},
+		{"case aUtO", "aUtO", ModeAuto, true},
+		{"case NoNe", "NoNe", ModeNone, true},
+		{"empty", "", ModeBudget, false},
+		{"level high", "high", ModeBudget, false},
+		{"numeric", "8192", ModeBudget, false},
+		{"negative other", "-2", ModeBudget, false},
+		{"whitespace", "  none  ", ModeBudget, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mode, ok := ParseSpecialSuffix(tt.rawSuffix)
+			if mode != tt.wantMode {
+				t.Errorf("mode = %q, want %q", mode, tt.wantMode)
+			}
+			if ok != tt.wantOK {
+				t.Errorf("ok = %v, want %v", ok, tt.wantOK)
+			}
+		})
+	}
+}
+
+// TestParseSuffixAliasFormats tests alias model suffix parsing.
+//
+// This test validates that short model aliases (e.g., g25p, cs45) work correctly
+// with all suffix types. Alias-to-canonical-model mapping is caller's responsibility.
+func TestParseSuffixAliasFormats(t *testing.T) {
+	tests := []struct {
+		name        string        // test case description
+		model       string        // input model string with optional suffix
+		wantName    string        // expected ModelName after parsing
+		wantSuffix  bool          // expected HasSuffix value
+		wantRaw     string        // expected RawSuffix value
+		checkBudget bool          // if true, verify ParseNumericSuffix result
+		wantBudget  int           // expected budget (only when checkBudget=true)
+		checkLevel  bool          // if true, verify ParseLevelSuffix result
+		wantLevel   ThinkingLevel // expected level (only when checkLevel=true)
+		checkMode   bool          // if true, verify ParseSpecialSuffix result
+		wantMode    ThinkingMode  // expected mode (only when checkMode=true)
+	}{
+		// Alias + numeric suffix
+		{"alias numeric g25p", "g25p(1000)", "g25p", true, "1000", true, 1000, false, "", false, 0},
+		{"alias numeric cs45", "cs45(16384)", "cs45", true, "16384", true, 16384, false, "", false, 0},
+		{"alias numeric g3f", "g3f(8192)", "g3f", true, "8192", true, 8192, false, "", false, 0},
+		// Alias + level suffix
+		{"alias level gpt52", "gpt52(high)", "gpt52", true, "high", false, 0, true, LevelHigh, false, 0},
+		{"alias level g25f", "g25f(medium)", "g25f", true, "medium", false, 0, true, LevelMedium, false, 0},
+		{"alias level cs4", "cs4(low)", "cs4", true, "low", false, 0, true, LevelLow, false, 0},
+		// Alias + special suffix
+		{"alias auto g3f", "g3f(auto)", "g3f", true, "auto", false, 0, false, "", true, ModeAuto},
+		{"alias none claude", "claude(none)", "claude", true, "none", false, 0, false, "", true, ModeNone},
+		{"alias -1 g25p", "g25p(-1)", "g25p", true, "-1", false, 0, false, "", true, ModeAuto},
+		// Single char alias
+		{"single char c", "c(1024)", "c", true, "1024", true, 1024, false, "", false, 0},
+		{"single char g", "g(high)", "g", true, "high", false, 0, true, LevelHigh, false, 0},
+		// Alias containing numbers
+		{"alias with num gpt5", "gpt5(medium)", "gpt5", true, "medium", false, 0, true, LevelMedium, false, 0},
+		{"alias with num g25", "g25(1000)", "g25", true, "1000", true, 1000, false, "", false, 0},
+		// Edge cases
+		{"no suffix", "g25p", "g25p", false, "", false, 0, false, "", false, 0},
+		{"empty alias", "(1000)", "", true, "1000", true, 1000, false, "", false, 0},
+		{"hyphen alias", "g-25-p(1000)", "g-25-p", true, "1000", true, 1000, false, "", false, 0},
+		{"underscore alias", "g_25_p(high)", "g_25_p", true, "high", false, 0, true, LevelHigh, false, 0},
+		{"nested parens", "g25p(test)(1000)", "g25p(test)", true, "1000", true, 1000, false, "", false, 0},
+	}
+
+	// ParseSuffix only extracts alias and suffix; mapping to canonical model is caller responsibility.
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ParseSuffix(tt.model)
+
+			if result.ModelName != tt.wantName {
+				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantName)
+			}
+			if result.HasSuffix != tt.wantSuffix {
+				t.Errorf("ParseSuffix(%q).HasSuffix = %v, want %v", tt.model, result.HasSuffix, tt.wantSuffix)
+			}
+			if result.RawSuffix != tt.wantRaw {
+				t.Errorf("ParseSuffix(%q).RawSuffix = %q, want %q", tt.model, result.RawSuffix, tt.wantRaw)
+			}
+
+			if result.HasSuffix {
+				if tt.checkBudget {
+					budget, ok := ParseNumericSuffix(result.RawSuffix)
+					if !ok || budget != tt.wantBudget {
+						t.Errorf("ParseNumericSuffix(%q) = (%d, %v), want (%d, true)",
+							result.RawSuffix, budget, ok, tt.wantBudget)
+					}
+				}
+				if tt.checkLevel {
+					level, ok := ParseLevelSuffix(result.RawSuffix)
+					if !ok || level != tt.wantLevel {
+						t.Errorf("ParseLevelSuffix(%q) = (%q, %v), want (%q, true)",
+							result.RawSuffix, level, ok, tt.wantLevel)
+					}
+				}
+				if tt.checkMode {
+					mode, ok := ParseSpecialSuffix(result.RawSuffix)
+					if !ok || mode != tt.wantMode {
+						t.Errorf("ParseSpecialSuffix(%q) = (%v, %v), want (%v, true)",
+							result.RawSuffix, mode, ok, tt.wantMode)
+					}
+				}
+			}
+		})
+	}
+}
diff --git a/internal/thinking/types.go b/internal/thinking/types.go
new file mode 100644
index 00000000..7197fa6e
--- /dev/null
+++ b/internal/thinking/types.go
@@ -0,0 +1,100 @@
+// Package thinking provides unified thinking configuration processing.
+//
+// This package offers a unified interface for parsing, validating, and applying
+// thinking configurations across various AI providers (Claude, Gemini, OpenAI, iFlow).
+package thinking
+
+import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+
+// ThinkingMode represents the type of thinking configuration mode.
+type ThinkingMode int
+
+const (
+	// ModeBudget indicates using a numeric budget (corresponds to suffix "(1000)" etc.)
+	ModeBudget ThinkingMode = iota
+	// ModeLevel indicates using a discrete level (corresponds to suffix "(high)" etc.)
+	ModeLevel
+	// ModeNone indicates thinking is disabled (corresponds to suffix "(none)" or budget=0)
+	ModeNone
+	// ModeAuto indicates automatic/dynamic thinking (corresponds to suffix "(auto)" or budget=-1)
+	ModeAuto
+)
+
+// ThinkingLevel represents a discrete thinking level.
+type ThinkingLevel string
+
+const (
+	// LevelNone disables thinking
+	LevelNone ThinkingLevel = "none"
+	// LevelAuto enables automatic/dynamic thinking
+	LevelAuto ThinkingLevel = "auto"
+	// LevelMinimal sets minimal thinking effort
+	LevelMinimal ThinkingLevel = "minimal"
+	// LevelLow sets low thinking effort
+	LevelLow ThinkingLevel = "low"
+	// LevelMedium sets medium thinking effort
+	LevelMedium ThinkingLevel = "medium"
+	// LevelHigh sets high thinking effort
+	LevelHigh ThinkingLevel = "high"
+	// LevelXHigh sets extra-high thinking effort
+	LevelXHigh ThinkingLevel = "xhigh"
+)
+
+// ThinkingConfig represents a unified thinking configuration.
+//
+// This struct is used to pass thinking configuration information between components.
+// Depending on Mode, either Budget or Level field is effective:
+//   - ModeNone: Budget=0, Level is ignored
+//   - ModeAuto: Budget=-1, Level is ignored
+//   - ModeBudget: Budget is a positive integer, Level is ignored
+//   - ModeLevel: Budget is ignored, Level is a valid level
+type ThinkingConfig struct {
+	// Mode specifies the configuration mode
+	Mode ThinkingMode
+	// Budget is the thinking budget (token count), only effective when Mode is ModeBudget.
+	// Special values: 0 means disabled, -1 means automatic
+	Budget int
+	// Level is the thinking level, only effective when Mode is ModeLevel
+	Level ThinkingLevel
+}
+
+// SuffixResult represents the result of parsing a model name for thinking suffix.
+//
+// A thinking suffix is specified in the format model-name(value), where value
+// can be a numeric budget (e.g., "16384") or a level name (e.g., "high").
+type SuffixResult struct {
+	// ModelName is the model name with the suffix removed.
+	// If no suffix was found, this equals the original input.
+	ModelName string
+
+	// HasSuffix indicates whether a valid suffix was found.
+	HasSuffix bool
+
+	// RawSuffix is the content inside the parentheses, without the parentheses.
+	// Empty string if HasSuffix is false.
+	RawSuffix string
+}
+
+// ProviderApplier defines the interface for provider-specific thinking configuration application.
+//
+// Types implementing this interface are responsible for converting a unified ThinkingConfig
+// into provider-specific format and applying it to the request body.
+//
+// Implementation requirements:
+//   - Apply method must be idempotent
+//   - Must not modify the input config or modelInfo
+//   - Returns a modified copy of the request body
+//   - Returns appropriate ThinkingError for unsupported configurations
+type ProviderApplier interface {
+	// Apply applies the thinking configuration to the request body.
+	//
+	// Parameters:
+	//   - body: Original request body JSON
+	//   - config: Unified thinking configuration
+	//   - modelInfo: Model registry information containing ThinkingSupport properties
+	//
+	// Returns:
+	//   - Modified request body JSON
+	//   - ThinkingError if the configuration is invalid or unsupported
+	Apply(body []byte, config ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error)
+}
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
new file mode 100644
index 00000000..66f8160c
--- /dev/null
+++ b/internal/thinking/validate.go
@@ -0,0 +1,260 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	log "github.com/sirupsen/logrus"
+)
+
+// ClampBudget clamps a budget value to the specified range [min, max].
+//
+// This function ensures budget values stay within model-supported bounds.
+// When clamping occurs, a Debug-level log is recorded.
+//
+// Special handling:
+//   - Auto value (-1) passes through without clamping
+//   - Values below min are clamped to min
+//   - Values above max are clamped to max
+//
+// Parameters:
+//   - value: The budget value to clamp
+//   - min: Minimum allowed budget (inclusive)
+//   - max: Maximum allowed budget (inclusive)
+//
+// Returns:
+//   - The clamped budget value (min ≤ result ≤ max, or -1 for auto)
+//
+// Logging:
+//   - Debug level when value is clamped (either to min or max)
+//   - Fields: original_value, clamped_to, min, max
+func ClampBudget(value, min, max int) int {
+	// Auto value (-1) passes through without clamping
+	if value == -1 {
+		return value
+	}
+
+	// Clamp to min if below
+	if value < min {
+		logClamp(value, min, min, max)
+		return min
+	}
+
+	// Clamp to max if above
+	if value > max {
+		logClamp(value, max, min, max)
+		return max
+	}
+
+	// Within range, return original
+	return value
+}
+
+// ClampBudgetWithZeroCheck clamps a budget value to the specified range [min, max]
+// while honoring the ZeroAllowed constraint.
+//
+// This function extends ClampBudget with ZeroAllowed boundary handling.
+// When zeroAllowed is false and value is 0, the value is clamped to min and logged.
+//
+// Parameters:
+//   - value: The budget value to clamp
+//   - min: Minimum allowed budget (inclusive)
+//   - max: Maximum allowed budget (inclusive)
+//   - zeroAllowed: Whether 0 (thinking disabled) is allowed
+//
+// Returns:
+//   - The clamped budget value (min ≤ result ≤ max, or -1 for auto)
+//
+// Logging:
+//   - Warn level when zeroAllowed=false and value=0 (zero not allowed for model)
+//   - Fields: original_value, clamped_to, reason
+func ClampBudgetWithZeroCheck(value, min, max int, zeroAllowed bool) int {
+	if value == 0 {
+		if zeroAllowed {
+			return 0
+		}
+		log.WithFields(log.Fields{
+			"original_value": value,
+			"clamped_to":     min,
+			"min":            min,
+			"max":            max,
+			"reason":         "zero_not_allowed",
+		}).Warn("budget clamped: zero not allowed")
+		return min
+	}
+
+	return ClampBudget(value, min, max)
+}
+
+// ValidateConfig validates a thinking configuration against model capabilities.
+//
+// This function performs comprehensive validation:
+//   - Checks if the model supports thinking
+//   - Auto-converts between Budget and Level formats based on model capability
+//   - Validates that requested level is in the model's supported levels list
+//   - Clamps budget values to model's allowed range
+//
+// Parameters:
+//   - config: The thinking configuration to validate
+//   - support: Model's ThinkingSupport properties (nil means no thinking support)
+//
+// Returns:
+//   - Normalized ThinkingConfig with clamped values
+//   - ThinkingError if validation fails (ErrThinkingNotSupported, ErrLevelNotSupported, etc.)
+//
+// Auto-conversion behavior:
+//   - Budget-only model + Level config → Level converted to Budget
+//   - Level-only model + Budget config → Budget converted to Level
+//   - Hybrid model → preserve original format
+func ValidateConfig(config ThinkingConfig, support *registry.ThinkingSupport) (*ThinkingConfig, error) {
+	normalized := config
+	if support == nil {
+		if config.Mode != ModeNone {
+			return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "unknown")
+		}
+		return &normalized, nil
+	}
+
+	capability := detectModelCapability(&registry.ModelInfo{Thinking: support})
+	switch capability {
+	case CapabilityBudgetOnly:
+		if normalized.Mode == ModeLevel {
+			if normalized.Level == LevelAuto {
+				break
+			}
+			budget, ok := ConvertLevelToBudget(string(normalized.Level))
+			if !ok {
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", normalized.Level))
+			}
+			normalized.Mode = ModeBudget
+			normalized.Budget = budget
+			normalized.Level = ""
+		}
+	case CapabilityLevelOnly:
+		if normalized.Mode == ModeBudget {
+			level, ok := ConvertBudgetToLevel(normalized.Budget)
+			if !ok {
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", normalized.Budget))
+			}
+			normalized.Mode = ModeLevel
+			normalized.Level = ThinkingLevel(level)
+			normalized.Budget = 0
+		}
+	case CapabilityHybrid:
+	}
+
+	if normalized.Mode == ModeLevel && normalized.Level == LevelNone {
+		normalized.Mode = ModeNone
+		normalized.Budget = 0
+		normalized.Level = ""
+	}
+	if normalized.Mode == ModeLevel && normalized.Level == LevelAuto {
+		normalized.Mode = ModeAuto
+		normalized.Budget = -1
+		normalized.Level = ""
+	}
+	if normalized.Mode == ModeBudget && normalized.Budget == 0 {
+		normalized.Mode = ModeNone
+		normalized.Level = ""
+	}
+
+	if len(support.Levels) > 0 && normalized.Mode == ModeLevel {
+		if !isLevelSupported(string(normalized.Level), support.Levels) {
+			validLevels := normalizeLevels(support.Levels)
+			message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(normalized.Level)), strings.Join(validLevels, ", "))
+			return nil, NewThinkingError(ErrLevelNotSupported, message)
+		}
+	}
+
+	// Convert ModeAuto to mid-range if dynamic not allowed
+	if normalized.Mode == ModeAuto && !support.DynamicAllowed {
+		normalized = convertAutoToMidRange(normalized, support)
+	}
+
+	switch normalized.Mode {
+	case ModeBudget, ModeAuto, ModeNone:
+		clamped := ClampBudgetWithZeroCheck(normalized.Budget, support.Min, support.Max, support.ZeroAllowed)
+		normalized.Budget = clamped
+	}
+
+	// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
+	// This ensures Apply layer doesn't need to access support.Levels
+	if normalized.Mode == ModeNone && normalized.Budget > 0 && len(support.Levels) > 0 {
+		normalized.Level = ThinkingLevel(support.Levels[0])
+	}
+
+	return &normalized, nil
+}
+
+func isLevelSupported(level string, supported []string) bool {
+	for _, candidate := range supported {
+		if strings.EqualFold(level, strings.TrimSpace(candidate)) {
+			return true
+		}
+	}
+	return false
+}
+
+func normalizeLevels(levels []string) []string {
+	normalized := make([]string, 0, len(levels))
+	for _, level := range levels {
+		normalized = append(normalized, strings.ToLower(strings.TrimSpace(level)))
+	}
+	return normalized
+}
+
+// convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
+//
+// This function handles the case where a model does not support dynamic/auto thinking.
+// The auto mode is silently converted to a fixed value based on model capability:
+//   - Level-only models: convert to ModeLevel with LevelMedium
+//   - Budget models: convert to ModeBudget with mid = (Min + Max) / 2
+//
+// Logging:
+//   - Debug level when conversion occurs
+//   - Fields: original_mode, clamped_to, reason
+func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport) ThinkingConfig {
+	// For level-only models (has Levels but no Min/Max range), use ModeLevel with medium
+	if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 {
+		config.Mode = ModeLevel
+		config.Level = LevelMedium
+		config.Budget = 0
+		log.WithFields(log.Fields{
+			"original_mode": "auto",
+			"clamped_to":    string(LevelMedium),
+			"reason":        "dynamic_not_allowed_level_only",
+		}).Debug("thinking mode converted: dynamic not allowed, using medium level")
+		return config
+	}
+
+	// For budget models, use mid-range budget
+	mid := (support.Min + support.Max) / 2
+	if mid <= 0 && support.ZeroAllowed {
+		config.Mode = ModeNone
+		config.Budget = 0
+	} else if mid <= 0 {
+		config.Mode = ModeBudget
+		config.Budget = support.Min
+	} else {
+		config.Mode = ModeBudget
+		config.Budget = mid
+	}
+	log.WithFields(log.Fields{
+		"original_mode": "auto",
+		"clamped_to":    config.Budget,
+		"reason":        "dynamic_not_allowed",
+	}).Debug("thinking mode converted: dynamic not allowed")
+	return config
+}
+
+// logClamp logs a debug message when budget clamping occurs.
+func logClamp(original, clampedTo, min, max int) {
+	log.WithFields(log.Fields{
+		"original_value": original,
+		"clamped_to":     clampedTo,
+		"min":            min,
+		"max":            max,
+	}).Debug("budget clamped: value outside model range")
+}
diff --git a/internal/thinking/validate_test.go b/internal/thinking/validate_test.go
new file mode 100644
index 00000000..e17a1586
--- /dev/null
+++ b/internal/thinking/validate_test.go
@@ -0,0 +1,349 @@
+// Package thinking provides unified thinking configuration processing logic.
+package thinking
+
+import (
+	"strings"
+	"testing"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	log "github.com/sirupsen/logrus"
+	logtest "github.com/sirupsen/logrus/hooks/test"
+)
+
+// TestClampBudget tests the ClampBudget function.
+//
+// ClampBudget applies range constraints to a budget value:
+//   - budget < Min → clamp to Min (with Debug log)
+//   - budget > Max → clamp to Max (with Debug log)
+//   - Auto value (-1) passes through unchanged
+func TestClampBudget(t *testing.T) {
+	tests := []struct {
+		name  string
+		value int
+		min   int
+		max   int
+		want  int
+	}{
+		// Within range - no clamping
+		{"within range", 8192, 128, 32768, 8192},
+		{"at min", 128, 128, 32768, 128},
+		{"at max", 32768, 128, 32768, 32768},
+
+		// Below min - clamp to min
+		{"below min", 100, 128, 32768, 128},
+
+		// Above max - clamp to max
+		{"above max", 50000, 128, 32768, 32768},
+
+		// Edge cases
+		{"min equals max", 5000, 5000, 5000, 5000},
+		{"zero min zero value", 0, 0, 100, 0},
+
+		// Auto value (-1) - passes through
+		{"auto value", -1, 128, 32768, -1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ClampBudget(tt.value, tt.min, tt.max)
+			if got != tt.want {
+				t.Errorf("ClampBudget(%d, %d, %d) = %d, want %d",
+					tt.value, tt.min, tt.max, got, tt.want)
+			}
+		})
+	}
+}
+
+// TestZeroAllowedBoundaryHandling tests ZeroAllowed=false edge cases.
+//
+// When ZeroAllowed=false and user requests 0, clamp to Min + log Warn.
+func TestZeroAllowedBoundaryHandling(t *testing.T) {
+	tests := []struct {
+		name        string
+		value       int
+		min         int
+		max         int
+		zeroAllowed bool
+		want        int
+	}{
+		// ZeroAllowed=true: 0 stays 0
+		{"zero allowed - keep zero", 0, 128, 32768, true, 0},
+
+		// ZeroAllowed=false: 0 clamps to min
+		{"zero not allowed - clamp to min", 0, 128, 32768, false, 128},
+
+		// ZeroAllowed=false but non-zero value: normal clamping
+		{"zero not allowed - positive value", 8192, 1024, 100000, false, 8192},
+
+		// Auto value (-1) always passes through
+		{"auto value", -1, 128, 32768, false, -1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ClampBudgetWithZeroCheck(tt.value, tt.min, tt.max, tt.zeroAllowed)
+			if got != tt.want {
+				t.Errorf("ClampBudgetWithZeroCheck(%d, %d, %d, %v) = %d, want %d",
+					tt.value, tt.min, tt.max, tt.zeroAllowed, got, tt.want)
+			}
+		})
+	}
+}
+
+// TestValidateConfigFramework verifies the ValidateConfig function framework.
+// This test is merged into TestValidateConfig for consolidation.
+
+// TestValidateConfigNotSupported verifies nil support handling.
+// This test is merged into TestValidateConfig for consolidation.
+
+// TestValidateConfigConversion verifies mode conversion based on capability.
+// This test is merged into TestValidateConfig for consolidation.
+
+// TestValidateConfigLevelSupport verifies level list validation.
+// This test is merged into TestValidateConfig for consolidation.
+
+// TestValidateConfigClamping verifies budget clamping behavior.
+// This test is merged into TestValidateConfig for consolidation.
+
+// TestValidateConfig is the comprehensive test for ValidateConfig function.
+//
+// ValidateConfig checks if a ThinkingConfig is valid for a given model.
+// This test covers all validation scenarios including:
+//   - Framework basics (nil support with ModeNone)
+//   - Error cases (thinking not supported, level not supported, dynamic not allowed)
+//   - Mode conversion (budget-only, level-only, hybrid)
+//   - Budget clamping (to max, to min)
+//   - ZeroAllowed boundary handling (ModeNone with ZeroAllowed=false)
+//   - DynamicAllowed validation
+//
+// Depends on: Epic 5 Story 5-3 (config validity validation)
+func TestValidateConfig(t *testing.T) {
+	tests := []struct {
+		name       string
+		config     ThinkingConfig
+		support    *registry.ThinkingSupport
+		wantMode   ThinkingMode
+		wantBudget int
+		wantLevel  ThinkingLevel
+		wantErr    bool
+		wantCode   ErrorCode
+	}{
+		// Framework basics
+		{"nil support mode none", ThinkingConfig{Mode: ModeNone, Budget: 0}, nil, ModeNone, 0, "", false, ""},
+
+		// Valid configs - no conversion needed
+		{"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 8192, "", false, ""},
+
+		// Auto-conversion: Level → Budget
+		{"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 24576, "", false, ""},
+
+		// Auto-conversion: Budget → Level
+		{"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 5000}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, LevelMedium, false, ""},
+
+		// Hybrid preserves original format
+		{"hybrid preserves level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}}, ModeLevel, 0, LevelLow, false, ""},
+
+		// Budget clamping
+		{"budget clamped to max", ThinkingConfig{Mode: ModeBudget, Budget: 200000}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 100000, "", false, ""},
+		{"budget clamped to min", ThinkingConfig{Mode: ModeBudget, Budget: 100}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 1024, "", false, ""},
+
+		// Error: thinking not supported
+		{"thinking not supported", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, 0, 0, "", true, ErrThinkingNotSupported},
+
+		// Error: level not in list
+		{"level not supported", ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, 0, 0, "", true, ErrLevelNotSupported},
+
+		// Level case-insensitive
+		{"level supported case-insensitive", ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel("HIGH")}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, ThinkingLevel("HIGH"), false, ""},
+
+		// ModeAuto with DynamicAllowed
+		{"auto with dynamic allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, &registry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
+
+		// ModeAuto with DynamicAllowed=false - converts to mid-range (M3)
+		{"auto with dynamic not allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, &registry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""},
+
+		// ModeNone with ZeroAllowed=true - stays as ModeNone
+		{"mode none with zero allowed", ThinkingConfig{Mode: ModeNone, Budget: 0}, &registry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: true}, ModeNone, 0, "", false, ""},
+
+		// Budget=0 converts to ModeNone before clamping (M1)
+		{"budget zero converts to none", ThinkingConfig{Mode: ModeBudget, Budget: 0}, &registry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false}, ModeNone, 128, "", false, ""},
+
+		// Level=none converts to ModeNone before clamping, then Level set to lowest
+		{"level none converts to none", ThinkingConfig{Mode: ModeLevel, Level: LevelNone}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""},
+		{"level auto converts to auto", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
+		// M1: Level=auto with DynamicAllowed=false - converts to mid-range budget
+		{"level auto with dynamic not allowed", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""},
+		// M2: Level=auto on Budget-only model (no Levels)
+		{"level auto on budget-only model", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, &registry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
+
+		// ModeNone with ZeroAllowed=false - clamps to min but preserves ModeNone (M1)
+		{"mode none with zero not allowed - preserve mode", ThinkingConfig{Mode: ModeNone, Budget: 0}, &registry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false}, ModeNone, 1024, "", false, ""},
+
+		// ModeNone with clamped Budget > 0 and Levels: sets Level to lowest
+		{"mode none clamped with levels", ThinkingConfig{Mode: ModeNone, Budget: 0}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := ValidateConfig(tt.config, tt.support)
+			if tt.wantErr {
+				if err == nil {
+					t.Fatalf("ValidateConfig(%+v, support) error = nil, want %v", tt.config, tt.wantCode)
+				}
+				thinkingErr, ok := err.(*ThinkingError)
+				if !ok {
+					t.Fatalf("ValidateConfig(%+v, support) error type = %T, want *ThinkingError", tt.config, err)
+				}
+				if thinkingErr.Code != tt.wantCode {
+					t.Errorf("ValidateConfig(%+v, support) code = %v, want %v", tt.config, thinkingErr.Code, tt.wantCode)
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("ValidateConfig(%+v, support) returned error: %v", tt.config, err)
+			}
+			if got == nil {
+				t.Fatalf("ValidateConfig(%+v, support) returned nil config", tt.config)
+			}
+			if got.Mode != tt.wantMode {
+				t.Errorf("ValidateConfig(%+v, support) Mode = %v, want %v", tt.config, got.Mode, tt.wantMode)
+			}
+			if got.Budget != tt.wantBudget {
+				t.Errorf("ValidateConfig(%+v, support) Budget = %d, want %d", tt.config, got.Budget, tt.wantBudget)
+			}
+			if got.Level != tt.wantLevel {
+				t.Errorf("ValidateConfig(%+v, support) Level = %q, want %q", tt.config, got.Level, tt.wantLevel)
+			}
+		})
+	}
+}
+
+// TestValidationErrorMessages tests error message formatting.
+//
+// Error messages should:
+//   - Be lowercase
+//   - Have no trailing period
+//   - Include context with %s/%d
+//
+// Depends on: Epic 5 Story 5-4 (validation error messages)
+func TestValidationErrorMessages(t *testing.T) {
+	tests := []struct {
+		name         string
+		getErr       func() error
+		wantCode     ErrorCode
+		wantContains string
+	}{
+		{"invalid suffix", func() error {
+			_, err := ParseSuffixWithError("model(abc")
+			return err
+		}, ErrInvalidSuffix, "model(abc"},
+		{"level not supported", func() error {
+			_, err := ValidateConfig(ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}})
+			return err
+		}, ErrLevelNotSupported, "valid levels: low, medium, high"},
+		{"thinking not supported", func() error {
+			_, err := ValidateConfig(ThinkingConfig{Mode: ModeBudget, Budget: 1024}, nil)
+			return err
+		}, ErrThinkingNotSupported, "thinking not supported for this model"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.getErr()
+			if err == nil {
+				t.Fatalf("error = nil, want ThinkingError")
+			}
+			thinkingErr, ok := err.(*ThinkingError)
+			if !ok {
+				t.Fatalf("error type = %T, want *ThinkingError", err)
+			}
+			if thinkingErr.Code != tt.wantCode {
+				t.Errorf("code = %v, want %v", thinkingErr.Code, tt.wantCode)
+			}
+			if thinkingErr.Message == "" {
+				t.Fatalf("message is empty")
+			}
+			first, _ := utf8.DecodeRuneInString(thinkingErr.Message)
+			if unicode.IsLetter(first) && !unicode.IsLower(first) {
+				t.Errorf("message does not start with lowercase: %q", thinkingErr.Message)
+			}
+			if strings.HasSuffix(thinkingErr.Message, ".") {
+				t.Errorf("message has trailing period: %q", thinkingErr.Message)
+			}
+			if !strings.Contains(thinkingErr.Message, tt.wantContains) {
+				t.Errorf("message %q does not contain %q", thinkingErr.Message, tt.wantContains)
+			}
+		})
+	}
+}
+
+// TestClampingLogging tests that clamping produces correct log entries.
+//
+// Clamping behavior:
+//   - Normal clamp (budget outside range) → Debug log
+//   - ZeroAllowed=false + zero request → Warn log
+//
+// Depends on: Epic 5 Story 5-1, 5-2
+func TestClampingLogging(t *testing.T) {
+	tests := []struct {
+		name         string
+		useZeroCheck bool
+		budget       int
+		min          int
+		max          int
+		zeroAllowed  bool
+		wantLevel    log.Level
+		wantReason   string
+		wantClamped  int
+	}{
+		{"above max - debug", false, 50000, 128, 32768, false, log.DebugLevel, "", 32768},
+		{"below min - debug", false, 50, 128, 32768, false, log.DebugLevel, "", 128},
+		{"zero not allowed - warn", true, 0, 128, 32768, false, log.WarnLevel, "zero_not_allowed", 128},
+	}
+
+	logger := log.StandardLogger()
+	originalLevel := logger.GetLevel()
+	logger.SetLevel(log.DebugLevel)
+	hook := logtest.NewLocal(logger)
+	t.Cleanup(func() {
+		logger.SetLevel(originalLevel)
+		hook.Reset()
+	})
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			hook.Reset()
+			var got int
+			if tt.useZeroCheck {
+				got = ClampBudgetWithZeroCheck(tt.budget, tt.min, tt.max, tt.zeroAllowed)
+			} else {
+				got = ClampBudget(tt.budget, tt.min, tt.max)
+			}
+			if got != tt.wantClamped {
+				t.Fatalf("clamped budget = %d, want %d", got, tt.wantClamped)
+			}
+
+			entry := hook.LastEntry()
+			if entry == nil {
+				t.Fatalf("no log entry captured")
+			}
+			if entry.Level != tt.wantLevel {
+				t.Errorf("log level = %v, want %v", entry.Level, tt.wantLevel)
+			}
+
+			fields := []string{"original_value", "clamped_to", "min", "max"}
+			for _, key := range fields {
+				if _, ok := entry.Data[key]; !ok {
+					t.Errorf("missing field %q", key)
+				}
+			}
+			if tt.wantReason != "" {
+				if value, ok := entry.Data["reason"]; !ok || value != tt.wantReason {
+					t.Errorf("reason = %v, want %v", value, tt.wantReason)
+				}
+			}
+		})
+	}
+}
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index d5064c3c..c3e4c63f 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
@@ -385,12 +386,15 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	}
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
-		if t.Get("type").String() == "enabled" {
-			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-				budget := int(b.Int())
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
+		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		if modelInfo != nil && modelInfo.Thinking != nil {
+			if t.Get("type").String() == "enabled" {
+				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+					budget := int(b.Int())
+					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+				}
 			}
 		}
 	}
diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 7ca01b07..87782a5a 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -39,7 +40,8 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
 		if util.IsGemini3Model(modelName) {
 			switch effort {
@@ -53,14 +55,14 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					out = util.ApplyGeminiCLIThinkingLevel(out, level, nil)
 				}
 			}
-		} else if !util.ModelUsesThinkingLevels(modelName) {
+		} else if len(modelInfo.Thinking.Levels) == 0 {
 			out = util.ApplyReasoningEffortToGeminiCLI(out, effort)
 		}
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
@@ -71,7 +73,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
 				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingBudget.thinkingBudget", budget)
 				setBudget = true
 			}
 
@@ -87,7 +89,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 
 	// Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens
 	// This allows Claude Code and other Claude API clients to pass thinking configuration
-	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) {
+	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelInfo != nil && modelInfo.Thinking != nil {
 		if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 			if t.Get("type").String() == "enabled" {
 				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index faf1f9d1..c987c4b0 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -15,6 +15,8 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -115,15 +117,18 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 		}
 		// Include thoughts configuration for reasoning process visibility
 		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
-		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-			// Check for thinkingBudget first - if present, enable thinking with budget
-			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
-				out, _ = sjson.Set(out, "thinking.type", "enabled")
-				normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
-				out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
-			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
-				// Fallback to include_thoughts if no budget specified
-				out, _ = sjson.Set(out, "thinking.type", "enabled")
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+				// Check for thinkingBudget first - if present, enable thinking with budget
+				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					normalizedBudget := thinking.ClampBudget(int(thinkingBudget.Int()), modelInfo.Thinking.Min, modelInfo.Thinking.Max)
+					out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
+				} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+					// Fallback to include_thoughts if no budget specified
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				}
 			}
 		}
 	}
diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index ea04a97a..1ae1f274 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -15,7 +15,8 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -65,20 +66,23 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 
 	root := gjson.ParseBytes(rawJSON)
 
-	if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		effort := strings.ToLower(strings.TrimSpace(v.String()))
-		if effort != "" {
-			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
-			if ok {
-				switch budget {
-				case 0:
-					out, _ = sjson.Set(out, "thinking.type", "disabled")
-				case -1:
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-				default:
-					if budget > 0 {
+	if v := root.Get("reasoning_effort"); v.Exists() {
+		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+			effort := strings.ToLower(strings.TrimSpace(v.String()))
+			if effort != "" {
+				budget, ok := thinking.ConvertLevelToBudget(effort)
+				if ok {
+					switch budget {
+					case 0:
+						out, _ = sjson.Set(out, "thinking.type", "disabled")
+					case -1:
 						out, _ = sjson.Set(out, "thinking.type", "enabled")
-						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					default:
+						if budget > 0 {
+							out, _ = sjson.Set(out, "thinking.type", "enabled")
+							out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+						}
 					}
 				}
 			}
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
index d4b7e05f..3717afa4 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,7 +10,8 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -53,20 +54,23 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
 
 	root := gjson.ParseBytes(rawJSON)
 
-	if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		effort := strings.ToLower(strings.TrimSpace(v.String()))
-		if effort != "" {
-			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
-			if ok {
-				switch budget {
-				case 0:
-					out, _ = sjson.Set(out, "thinking.type", "disabled")
-				case -1:
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-				default:
-					if budget > 0 {
+	if v := root.Get("reasoning.effort"); v.Exists() {
+		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+			effort := strings.ToLower(strings.TrimSpace(v.String()))
+			if effort != "" {
+				budget, ok := thinking.ConvertLevelToBudget(effort)
+				if ok {
+					switch budget {
+					case 0:
+						out, _ = sjson.Set(out, "thinking.type", "disabled")
+					case -1:
 						out, _ = sjson.Set(out, "thinking.type", "enabled")
-						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					default:
+						if budget > 0 {
+							out, _ = sjson.Set(out, "thinking.type", "enabled")
+							out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+						}
 					}
 				}
 			}
diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go
index 59cd5ecf..e31671b3 100644
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,7 +12,8 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -219,19 +220,20 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 
 	// Convert thinking.budget_tokens to reasoning.effort for level-based models
 	reasoningEffort := "medium" // default
-	if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
-		switch thinking.Get("type").String() {
+	if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		switch thinkingConfig.Get("type").String() {
 		case "enabled":
-			if util.ModelUsesThinkingLevels(modelName) {
-				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
+				if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
 					budget := int(budgetTokens.Int())
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
 						reasoningEffort = effort
 					}
 				}
 			}
 		case "disabled":
-			if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
+			if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
 				reasoningEffort = effort
 			}
 		}
diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go
index 944b95f6..f6b258ef 100644
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -14,6 +14,8 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -251,10 +253,11 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	reasoningEffort := "medium" // default
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if util.ModelUsesThinkingLevels(modelName) {
+			modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
 				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
 					budget := int(thinkingBudget.Int())
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
 						reasoningEffort = effort
 					}
 				}
diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
index 66e0385f..f522df81 100644
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -9,8 +9,8 @@ import (
 	"bytes"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -160,12 +160,15 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 	}
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
-		if t.Get("type").String() == "enabled" {
-			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-				budget := int(b.Int())
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
+		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		if modelInfo != nil && modelInfo.Thinking != nil {
+			if t.Get("type").String() == "enabled" {
+				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+					budget := int(b.Int())
+					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+				}
 			}
 		}
 	}
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index 98188835..1a6505d0 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -39,13 +40,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go
index c410aad8..a10d00e9 100644
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -9,8 +9,8 @@ import (
 	"bytes"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -154,12 +154,15 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		if t.Get("type").String() == "enabled" {
-			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-				budget := int(b.Int())
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
+		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+			if t.Get("type").String() == "enabled" {
+				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+					budget := int(b.Int())
+					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
+					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
+				}
 			}
 		}
 	}
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 57e150c1..2328ad36 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -42,7 +43,8 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	// use thinkingLevel/includeThoughts instead.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
 		if util.IsGemini3Model(modelName) {
 			switch effort {
@@ -56,14 +58,14 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					out = util.ApplyGeminiThinkingLevel(out, level, nil)
 				}
 			}
-		} else if !util.ModelUsesThinkingLevels(modelName) {
+		} else if len(modelInfo.Thinking.Levels) == 0 {
 			out = util.ApplyReasoningEffortToGemini(out, effort)
 		}
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index 1bf67e7f..62e85eef 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
@@ -391,14 +392,15 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	// OpenAI official reasoning fields take precedence
 	// Only convert for models that use numeric budgets (not discrete levels).
 	hasOfficialThinking := root.Get("reasoning.effort").Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		reasoningEffort := root.Get("reasoning.effort")
 		out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
 	}
 
 	// Cherry Studio extension (applies only when official fields are missing)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go
index cc7fd01e..44cb237e 100644
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -9,6 +9,7 @@ import (
 	"bytes"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -61,23 +62,23 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	out, _ = sjson.Set(out, "stream", stream)
 
 	// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
-	if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() {
-		if thinkingType := thinking.Get("type"); thinkingType.Exists() {
+	if thinkingConfig := root.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+		if thinkingType := thinkingConfig.Get("type"); thinkingType.Exists() {
 			switch thinkingType.String() {
 			case "enabled":
-				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+				if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
 					budget := int(budgetTokens.Int())
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
 						out, _ = sjson.Set(out, "reasoning_effort", effort)
 					}
 				} else {
 					// No budget_tokens specified, default to "auto" for enabled thinking
-					if effort, ok := util.ThinkingBudgetToEffort(modelName, -1); ok && effort != "" {
+					if effort, ok := thinking.ConvertBudgetToLevel(-1); ok && effort != "" {
 						out, _ = sjson.Set(out, "reasoning_effort", effort)
 					}
 				}
 			case "disabled":
-				if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" {
+				if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			}
diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go
index f51d914b..7cdcb0f8 100644
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -12,7 +12,7 @@ import (
 	"math/big"
 	"strings"
 
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -82,7 +82,7 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
 			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
 				budget := int(thinkingBudget.Int())
-				if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+				if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			}
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index 3ce1bb0d..3ed4ee42 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -8,6 +8,8 @@ import (
 
 // ModelSupportsThinking reports whether the given model has Thinking capability
 // according to the model registry metadata (provider-agnostic).
+//
+// Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check.
 func ModelSupportsThinking(model string) bool {
 	if model == "" {
 		return false
@@ -32,6 +34,8 @@ func ModelSupportsThinking(model string) bool {
 // If the model is unknown or has no Thinking metadata, returns the original budget.
 // For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
 // or min (0 if zero is allowed and mid <= 0).
+//
+// Deprecated: Use thinking.ValidateConfig for budget normalization.
 func NormalizeThinkingBudget(model string, budget int) int {
 	if budget == -1 { // dynamic
 		if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
@@ -89,6 +93,8 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
 
 // GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
 // Returns nil if the model has no thinking support or no levels defined.
+//
+// Deprecated: Access modelInfo.Thinking.Levels directly.
 func GetModelThinkingLevels(model string) []string {
 	if model == "" {
 		return nil
@@ -102,6 +108,8 @@ func GetModelThinkingLevels(model string) []string {
 
 // ModelUsesThinkingLevels reports whether the model uses discrete reasoning
 // effort levels instead of numeric budgets.
+//
+// Deprecated: Check len(modelInfo.Thinking.Levels) > 0.
 func ModelUsesThinkingLevels(model string) bool {
 	levels := GetModelThinkingLevels(model)
 	return len(levels) > 0
@@ -109,6 +117,8 @@ func ModelUsesThinkingLevels(model string) bool {
 
 // NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
 // level for the given model. Returns false when the level is not supported.
+//
+// Deprecated: Use thinking.ValidateConfig for level validation.
 func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 	levels := GetModelThinkingLevels(model)
 	if len(levels) == 0 {
@@ -125,6 +135,8 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 
 // IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
 // These models may not advertise Thinking metadata in the registry.
+//
+// Deprecated: Check modelInfo.Type == "openai-compatibility".
 func IsOpenAICompatibilityModel(model string) bool {
 	if model == "" {
 		return false
@@ -149,6 +161,8 @@ func IsOpenAICompatibilityModel(model string) bool {
 //   - "xhigh"   -> 32768
 //
 // Returns false when the effort level is empty or unsupported.
+//
+// Deprecated: Use thinking.ConvertLevelToBudget instead.
 func ThinkingEffortToBudget(model, effort string) (int, bool) {
 	if effort == "" {
 		return 0, false
@@ -186,6 +200,8 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
 //   - "high"    -> 32768
 //
 // Returns false when the level is empty or unsupported.
+//
+// Deprecated: Use thinking.ConvertLevelToBudget instead.
 func ThinkingLevelToBudget(level string) (int, bool) {
 	if level == "" {
 		return 0, false
@@ -217,6 +233,8 @@ func ThinkingLevelToBudget(level string) (int, bool) {
 //   - 24577..      -> highest supported level for the model (defaults to "xhigh")
 //
 // Returns false when the budget is unsupported (negative values other than -1).
+//
+// Deprecated: Use thinking.ConvertBudgetToLevel instead.
 func ThinkingBudgetToEffort(model string, budget int) (string, bool) {
 	switch {
 	case budget == -1:
diff --git a/internal/util/thinking_deprecation_test.go b/internal/util/thinking_deprecation_test.go
new file mode 100644
index 00000000..6e513874
--- /dev/null
+++ b/internal/util/thinking_deprecation_test.go
@@ -0,0 +1,130 @@
+package util
+
+import (
+	"go/ast"
+	"go/parser"
+	"go/token"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+func TestThinkingUtilDeprecationComments(t *testing.T) {
+	dir, err := thinkingSourceDir()
+	if err != nil {
+		t.Fatalf("resolve thinking source dir: %v", err)
+	}
+
+	// Test thinking.go deprecation comments
+	t.Run("thinking.go", func(t *testing.T) {
+		docs := parseFuncDocs(t, filepath.Join(dir, "thinking.go"))
+		tests := []struct {
+			funcName string
+			want     string
+		}{
+			{"ModelSupportsThinking", "Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check."},
+			{"NormalizeThinkingBudget", "Deprecated: Use thinking.ValidateConfig for budget normalization."},
+			{"ThinkingEffortToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."},
+			{"ThinkingBudgetToEffort", "Deprecated: Use thinking.ConvertBudgetToLevel instead."},
+			{"GetModelThinkingLevels", "Deprecated: Access modelInfo.Thinking.Levels directly."},
+			{"ModelUsesThinkingLevels", "Deprecated: Check len(modelInfo.Thinking.Levels) > 0."},
+			{"NormalizeReasoningEffortLevel", "Deprecated: Use thinking.ValidateConfig for level validation."},
+			{"IsOpenAICompatibilityModel", "Deprecated: Check modelInfo.Type == \"openai-compatibility\"."},
+			{"ThinkingLevelToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."},
+		}
+		for _, tt := range tests {
+			t.Run(tt.funcName, func(t *testing.T) {
+				doc, ok := docs[tt.funcName]
+				if !ok {
+					t.Fatalf("missing function %q in thinking.go", tt.funcName)
+				}
+				if !strings.Contains(doc, tt.want) {
+					t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
+				}
+			})
+		}
+	})
+
+	// Test thinking_suffix.go deprecation comments
+	t.Run("thinking_suffix.go", func(t *testing.T) {
+		docs := parseFuncDocs(t, filepath.Join(dir, "thinking_suffix.go"))
+		tests := []struct {
+			funcName string
+			want     string
+		}{
+			{"NormalizeThinkingModel", "Deprecated: Use thinking.ParseSuffix instead."},
+			{"ThinkingFromMetadata", "Deprecated: Access ThinkingConfig fields directly."},
+			{"ResolveThinkingConfigFromMetadata", "Deprecated: Use thinking.ApplyThinking instead."},
+			{"ReasoningEffortFromMetadata", "Deprecated: Use thinking.ConvertBudgetToLevel instead."},
+			{"ResolveOriginalModel", "Deprecated: Parse model suffix with thinking.ParseSuffix."},
+		}
+		for _, tt := range tests {
+			t.Run(tt.funcName, func(t *testing.T) {
+				doc, ok := docs[tt.funcName]
+				if !ok {
+					t.Fatalf("missing function %q in thinking_suffix.go", tt.funcName)
+				}
+				if !strings.Contains(doc, tt.want) {
+					t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
+				}
+			})
+		}
+	})
+
+	// Test thinking_text.go deprecation comments
+	t.Run("thinking_text.go", func(t *testing.T) {
+		docs := parseFuncDocs(t, filepath.Join(dir, "thinking_text.go"))
+		tests := []struct {
+			funcName string
+			want     string
+		}{
+			{"GetThinkingText", "Deprecated: Use thinking package for thinking text extraction."},
+			{"GetThinkingTextFromJSON", "Deprecated: Use thinking package for thinking text extraction."},
+			{"SanitizeThinkingPart", "Deprecated: Use thinking package for thinking part sanitization."},
+			{"StripCacheControl", "Deprecated: Use thinking package for cache control stripping."},
+		}
+		for _, tt := range tests {
+			t.Run(tt.funcName, func(t *testing.T) {
+				doc, ok := docs[tt.funcName]
+				if !ok {
+					t.Fatalf("missing function %q in thinking_text.go", tt.funcName)
+				}
+				if !strings.Contains(doc, tt.want) {
+					t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
+				}
+			})
+		}
+	})
+}
+
+func parseFuncDocs(t *testing.T, path string) map[string]string {
+	t.Helper()
+	fset := token.NewFileSet()
+	file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
+	if err != nil {
+		t.Fatalf("parse %s: %v", path, err)
+	}
+	docs := map[string]string{}
+	for _, decl := range file.Decls {
+		fn, ok := decl.(*ast.FuncDecl)
+		if !ok || fn.Recv != nil {
+			continue
+		}
+		if fn.Doc == nil {
+			docs[fn.Name.Name] = ""
+			continue
+		}
+		docs[fn.Name.Name] = fn.Doc.Text()
+	}
+	return docs
+}
+
+func thinkingSourceDir() (string, error) {
+	_, thisFile, _, ok := runtime.Caller(0)
+	if !ok {
+		return "", os.ErrNotExist
+	}
+	return filepath.Dir(thisFile), nil
+}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index 0a72b4c5..c02cadaa 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -7,15 +7,30 @@ import (
 )
 
 const (
-	ThinkingBudgetMetadataKey            = "thinking_budget"
-	ThinkingIncludeThoughtsMetadataKey   = "thinking_include_thoughts"
-	ReasoningEffortMetadataKey           = "reasoning_effort"
-	ThinkingOriginalModelMetadataKey     = "thinking_original_model"
+	// Deprecated: No longer used. Thinking configuration is now passed via
+	// model name suffix and processed by thinking.ApplyThinking().
+	ThinkingBudgetMetadataKey = "thinking_budget"
+
+	// Deprecated: No longer used. See ThinkingBudgetMetadataKey.
+	ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
+
+	// Deprecated: No longer used. See ThinkingBudgetMetadataKey.
+	ReasoningEffortMetadataKey = "reasoning_effort"
+
+	// Deprecated: No longer used. The original model name (with suffix) is now
+	// preserved directly in the model field. Use thinking.ParseSuffix() to
+	// extract the base model name if needed.
+	ThinkingOriginalModelMetadataKey = "thinking_original_model"
+
+	// ModelMappingOriginalModelMetadataKey stores the client-requested model alias
+	// for OAuth model name mappings. This is NOT deprecated.
 	ModelMappingOriginalModelMetadataKey = "model_mapping_original_model"
 )
 
 // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
 // the normalized base model with extracted metadata. Supported pattern:
+//
+// Deprecated: Use thinking.ParseSuffix instead.
 //   - "(<value>)" where value can be:
 //   - A numeric budget (e.g., "(8192)", "(16384)")
 //   - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
@@ -89,6 +104,8 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 
 // ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
 // It accepts both the new generic keys and legacy Gemini-specific keys.
+//
+// Deprecated: Access ThinkingConfig fields directly.
 func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
 	if len(metadata) == 0 {
 		return nil, nil, nil, false
@@ -159,6 +176,8 @@ func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool)
 
 // ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
 // converting reasoning effort strings into budgets when possible.
+//
+// Deprecated: Use thinking.ApplyThinking instead.
 func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
 	budget, include, effort, matched := ThinkingFromMetadata(metadata)
 	if !matched {
@@ -180,6 +199,8 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*
 
 // ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
 // inferring "auto" and "none" when budgets request dynamic or disabled thinking.
+//
+// Deprecated: Use thinking.ConvertBudgetToLevel instead.
 func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
 	budget, include, effort, matched := ThinkingFromMetadata(metadata)
 	if !matched {
@@ -204,6 +225,8 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
 
 // ResolveOriginalModel returns the original model name stored in metadata (if present),
 // otherwise falls back to the provided model.
+//
+// Deprecated: Parse model suffix with thinking.ParseSuffix.
 func ResolveOriginalModel(model string, metadata map[string]any) string {
 	normalize := func(name string) string {
 		if name == "" {
diff --git a/internal/util/thinking_text.go b/internal/util/thinking_text.go
index c36d202d..7ebb76fc 100644
--- a/internal/util/thinking_text.go
+++ b/internal/util/thinking_text.go
@@ -11,6 +11,8 @@ import (
 // - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
 // - Gemini-style: { "thought": true, "text": "text" }
 // Returns the extracted text string.
+//
+// Deprecated: Use thinking package for thinking text extraction.
 func GetThinkingText(part gjson.Result) string {
 	// Try direct text field first (Gemini-style)
 	if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
@@ -42,6 +44,8 @@ func GetThinkingText(part gjson.Result) string {
 }
 
 // GetThinkingTextFromJSON extracts thinking text from a raw JSON string.
+//
+// Deprecated: Use thinking package for thinking text extraction.
 func GetThinkingTextFromJSON(jsonStr string) string {
 	return GetThinkingText(gjson.Parse(jsonStr))
 }
@@ -49,6 +53,8 @@ func GetThinkingTextFromJSON(jsonStr string) string {
 // SanitizeThinkingPart normalizes a thinking part to a canonical form.
 // Strips cache_control and other non-essential fields.
 // Returns the sanitized part as JSON string.
+//
+// Deprecated: Use thinking package for thinking part sanitization.
 func SanitizeThinkingPart(part gjson.Result) string {
 	// Gemini-style: { thought: true, text, thoughtSignature }
 	if part.Get("thought").Bool() {
@@ -79,6 +85,8 @@ func SanitizeThinkingPart(part gjson.Result) string {
 }
 
 // StripCacheControl removes cache_control and providerOptions from a JSON object.
+//
+// Deprecated: Use thinking package for cache control stripping.
 func StripCacheControl(jsonStr string) string {
 	result := jsonStr
 	result, _ = sjson.Delete(result, "cache_control")
diff --git a/sdk/api/handlers/handlers.go b/sdk/api/handlers/handlers.go
index 6160b9bd..232f0b95 100644
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -16,6 +16,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -379,7 +380,7 @@ func appendAPIResponse(c *gin.Context, data []byte) {
 // ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
-	providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
+	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
 		return nil, errMsg
 	}
@@ -388,16 +389,13 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
 	}
-	if cloned := cloneMetadata(metadata); cloned != nil {
-		req.Metadata = cloned
-	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
+	opts.Metadata = reqMeta
 	resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
 	if err != nil {
 		status := http.StatusInternalServerError
@@ -420,7 +418,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 // ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
-	providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
+	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
 		return nil, errMsg
 	}
@@ -429,16 +427,13 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
 	}
-	if cloned := cloneMetadata(metadata); cloned != nil {
-		req.Metadata = cloned
-	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
+	opts.Metadata = reqMeta
 	resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts)
 	if err != nil {
 		status := http.StatusInternalServerError
@@ -461,7 +456,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 // ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
-	providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName)
+	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
 		errChan <- errMsg
@@ -473,16 +468,13 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
 	}
-	if cloned := cloneMetadata(metadata); cloned != nil {
-		req.Metadata = cloned
-	}
 	opts := coreexecutor.Options{
 		Stream:          true,
 		Alt:             alt,
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
+	opts.Metadata = reqMeta
 	chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
 	if err != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
@@ -595,38 +587,40 @@ func statusFromError(err error) int {
 	return 0
 }
 
-func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
-	// Resolve "auto" model to an actual available model first
-	resolvedModelName := util.ResolveAutoModel(modelName)
-
-	// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
-	normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
-
-	// Use the normalizedModel to get the provider name.
-	providers = util.GetProviderName(normalizedModel)
-	if len(providers) == 0 && metadata != nil {
-		if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
-			if originalModel, okStr := originalRaw.(string); okStr {
-				originalModel = strings.TrimSpace(originalModel)
-				if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
-					if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
-						providers = altProviders
-						normalizedModel = originalModel
-					}
-				}
-			}
+func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, err *interfaces.ErrorMessage) {
+	resolvedModelName := modelName
+	initialSuffix := thinking.ParseSuffix(modelName)
+	if initialSuffix.ModelName == "auto" {
+		resolvedBase := util.ResolveAutoModel(initialSuffix.ModelName)
+		if initialSuffix.HasSuffix {
+			resolvedModelName = fmt.Sprintf("%s(%s)", resolvedBase, initialSuffix.RawSuffix)
+		} else {
+			resolvedModelName = resolvedBase
 		}
+	} else {
+		resolvedModelName = util.ResolveAutoModel(modelName)
+	}
+
+	parsed := thinking.ParseSuffix(resolvedModelName)
+	baseModel := strings.TrimSpace(parsed.ModelName)
+
+	providers = util.GetProviderName(baseModel)
+	// Fallback: if baseModel has no provider but differs from resolvedModelName,
+	// try using the full model name. This handles edge cases where custom models
+	// may be registered with their full suffixed name (e.g., "my-model(8192)").
+	// Evaluated in Story 11.8: This fallback is intentionally preserved to support
+	// custom model registrations that include thinking suffixes.
+	if len(providers) == 0 && baseModel != resolvedModelName {
+		providers = util.GetProviderName(resolvedModelName)
 	}
 
 	if len(providers) == 0 {
-		return nil, "", nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
+		return nil, "", &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
 	}
 
-	// If it's a dynamic model, the normalizedModel was already set to extractedModelName.
-	// If it's a non-dynamic model, normalizedModel was set by normalizeModelMetadata.
-	// So, normalizedModel is already correctly set at this point.
-
-	return providers, normalizedModel, metadata, nil
+	// The thinking suffix is preserved in the model name itself, so no
+	// metadata-based configuration passing is needed.
+	return providers, resolvedModelName, nil
 }
 
 func cloneBytes(src []byte) []byte {
@@ -638,10 +632,6 @@ func cloneBytes(src []byte) []byte {
 	return dst
 }
 
-func normalizeModelMetadata(modelName string) (string, map[string]any) {
-	return util.NormalizeThinkingModel(modelName)
-}
-
 func cloneMetadata(src map[string]any) map[string]any {
 	if len(src) == 0 {
 		return nil
diff --git a/sdk/api/handlers/handlers_request_details_test.go b/sdk/api/handlers/handlers_request_details_test.go
new file mode 100644
index 00000000..b0f6b132
--- /dev/null
+++ b/sdk/api/handlers/handlers_request_details_test.go
@@ -0,0 +1,118 @@
+package handlers
+
+import (
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+)
+
+func TestGetRequestDetails_PreservesSuffix(t *testing.T) {
+	modelRegistry := registry.GetGlobalRegistry()
+	now := time.Now().Unix()
+
+	modelRegistry.RegisterClient("test-request-details-gemini", "gemini", []*registry.ModelInfo{
+		{ID: "gemini-2.5-pro", Created: now + 30},
+		{ID: "gemini-2.5-flash", Created: now + 25},
+	})
+	modelRegistry.RegisterClient("test-request-details-openai", "openai", []*registry.ModelInfo{
+		{ID: "gpt-5.2", Created: now + 20},
+	})
+	modelRegistry.RegisterClient("test-request-details-claude", "claude", []*registry.ModelInfo{
+		{ID: "claude-sonnet-4-5", Created: now + 5},
+	})
+
+	// Ensure cleanup of all test registrations.
+	clientIDs := []string{
+		"test-request-details-gemini",
+		"test-request-details-openai",
+		"test-request-details-claude",
+	}
+	for _, clientID := range clientIDs {
+		id := clientID
+		t.Cleanup(func() {
+			modelRegistry.UnregisterClient(id)
+		})
+	}
+
+	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, coreauth.NewManager(nil, nil, nil))
+
+	tests := []struct {
+		name          string
+		inputModel    string
+		wantProviders []string
+		wantModel     string
+		wantErr       bool
+	}{
+		{
+			name:          "numeric suffix preserved",
+			inputModel:    "gemini-2.5-pro(8192)",
+			wantProviders: []string{"gemini"},
+			wantModel:     "gemini-2.5-pro(8192)",
+			wantErr:       false,
+		},
+		{
+			name:          "level suffix preserved",
+			inputModel:    "gpt-5.2(high)",
+			wantProviders: []string{"openai"},
+			wantModel:     "gpt-5.2(high)",
+			wantErr:       false,
+		},
+		{
+			name:          "no suffix unchanged",
+			inputModel:    "claude-sonnet-4-5",
+			wantProviders: []string{"claude"},
+			wantModel:     "claude-sonnet-4-5",
+			wantErr:       false,
+		},
+		{
+			name:          "unknown model with suffix",
+			inputModel:    "unknown-model(8192)",
+			wantProviders: nil,
+			wantModel:     "",
+			wantErr:       true,
+		},
+		{
+			name:          "auto suffix resolved",
+			inputModel:    "auto(high)",
+			wantProviders: []string{"gemini"},
+			wantModel:     "gemini-2.5-pro(high)",
+			wantErr:       false,
+		},
+		{
+			name:          "special suffix none preserved",
+			inputModel:    "gemini-2.5-flash(none)",
+			wantProviders: []string{"gemini"},
+			wantModel:     "gemini-2.5-flash(none)",
+			wantErr:       false,
+		},
+		{
+			name:          "special suffix auto preserved",
+			inputModel:    "claude-sonnet-4-5(auto)",
+			wantProviders: []string{"claude"},
+			wantModel:     "claude-sonnet-4-5(auto)",
+			wantErr:       false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			providers, model, errMsg := handler.getRequestDetails(tt.inputModel)
+			if (errMsg != nil) != tt.wantErr {
+				t.Fatalf("getRequestDetails() error = %v, wantErr %v", errMsg, tt.wantErr)
+			}
+			if errMsg != nil {
+				return
+			}
+			if !reflect.DeepEqual(providers, tt.wantProviders) {
+				t.Fatalf("getRequestDetails() providers = %v, want %v", providers, tt.wantProviders)
+			}
+			if model != tt.wantModel {
+				t.Fatalf("getRequestDetails() model = %v, want %v", model, tt.wantModel)
+			}
+		})
+	}
+}
diff --git a/sdk/cliproxy/auth/api_key_model_mappings_test.go b/sdk/cliproxy/auth/api_key_model_mappings_test.go
new file mode 100644
index 00000000..fb4dbe86
--- /dev/null
+++ b/sdk/cliproxy/auth/api_key_model_mappings_test.go
@@ -0,0 +1,201 @@
+package auth
+
+import (
+	"context"
+	"testing"
+
+	internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+)
+
+func TestLookupAPIKeyUpstreamModel(t *testing.T) {
+	cfg := &internalconfig.Config{
+		GeminiKey: []internalconfig.GeminiKey{
+			{
+				APIKey:  "k",
+				BaseURL: "https://example.com",
+				Models: []internalconfig.GeminiModel{
+					{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"},
+					{Name: "gemini-2.5-flash(low)", Alias: "g25f"},
+				},
+			},
+		},
+	}
+
+	mgr := NewManager(nil, nil, nil)
+	mgr.SetConfig(cfg)
+
+	ctx := context.Background()
+	_, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k", "base_url": "https://example.com"}})
+
+	tests := []struct {
+		name   string
+		authID string
+		input  string
+		want   string
+	}{
+		// Fast path + suffix preservation
+		{"alias with suffix", "a1", "g25p(8192)", "gemini-2.5-pro-exp-03-25(8192)"},
+		{"alias without suffix", "a1", "g25p", "gemini-2.5-pro-exp-03-25"},
+
+		// Config suffix takes priority
+		{"config suffix priority", "a1", "g25f(high)", "gemini-2.5-flash(low)"},
+		{"config suffix no user suffix", "a1", "g25f", "gemini-2.5-flash(low)"},
+
+		// Case insensitive
+		{"uppercase alias", "a1", "G25P", "gemini-2.5-pro-exp-03-25"},
+		{"mixed case with suffix", "a1", "G25p(4096)", "gemini-2.5-pro-exp-03-25(4096)"},
+
+		// Direct name lookup
+		{"upstream name direct", "a1", "gemini-2.5-pro-exp-03-25", "gemini-2.5-pro-exp-03-25"},
+		{"upstream name with suffix", "a1", "gemini-2.5-pro-exp-03-25(8192)", "gemini-2.5-pro-exp-03-25(8192)"},
+
+		// Cache miss scenarios
+		{"non-existent auth", "non-existent", "g25p", ""},
+		{"unknown alias", "a1", "unknown-alias", ""},
+		{"empty auth ID", "", "g25p", ""},
+		{"empty model", "a1", "", ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input)
+			if resolved != tt.want {
+				t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want)
+			}
+		})
+	}
+}
+
+func TestAPIKeyModelMappings_ConfigHotReload(t *testing.T) {
+	cfg := &internalconfig.Config{
+		GeminiKey: []internalconfig.GeminiKey{
+			{
+				APIKey: "k",
+				Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}},
+			},
+		},
+	}
+
+	mgr := NewManager(nil, nil, nil)
+	mgr.SetConfig(cfg)
+
+	ctx := context.Background()
+	_, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}})
+
+	// Initial mapping
+	if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-pro-exp-03-25" {
+		t.Fatalf("before reload: got %q, want %q", resolved, "gemini-2.5-pro-exp-03-25")
+	}
+
+	// Hot reload with new mapping
+	mgr.SetConfig(&internalconfig.Config{
+		GeminiKey: []internalconfig.GeminiKey{
+			{
+				APIKey: "k",
+				Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-flash", Alias: "g25p"}},
+			},
+		},
+	})
+
+	// New mapping should take effect
+	if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-flash" {
+		t.Fatalf("after reload: got %q, want %q", resolved, "gemini-2.5-flash")
+	}
+}
+
+func TestAPIKeyModelMappings_MultipleProviders(t *testing.T) {
+	cfg := &internalconfig.Config{
+		GeminiKey: []internalconfig.GeminiKey{{APIKey: "gemini-key", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro", Alias: "gp"}}}},
+		ClaudeKey: []internalconfig.ClaudeKey{{APIKey: "claude-key", Models: []internalconfig.ClaudeModel{{Name: "claude-sonnet-4", Alias: "cs4"}}}},
+		CodexKey:  []internalconfig.CodexKey{{APIKey: "codex-key", Models: []internalconfig.CodexModel{{Name: "o3", Alias: "o"}}}},
+	}
+
+	mgr := NewManager(nil, nil, nil)
+	mgr.SetConfig(cfg)
+
+	ctx := context.Background()
+	_, _ = mgr.Register(ctx, &Auth{ID: "gemini-auth", Provider: "gemini", Attributes: map[string]string{"api_key": "gemini-key"}})
+	_, _ = mgr.Register(ctx, &Auth{ID: "claude-auth", Provider: "claude", Attributes: map[string]string{"api_key": "claude-key"}})
+	_, _ = mgr.Register(ctx, &Auth{ID: "codex-auth", Provider: "codex", Attributes: map[string]string{"api_key": "codex-key"}})
+
+	tests := []struct {
+		authID, input, want string
+	}{
+		{"gemini-auth", "gp", "gemini-2.5-pro"},
+		{"claude-auth", "cs4", "claude-sonnet-4"},
+		{"codex-auth", "o", "o3"},
+	}
+
+	for _, tt := range tests {
+		if resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input); resolved != tt.want {
+			t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want)
+		}
+	}
+}
+
+func TestApplyAPIKeyModelMapping(t *testing.T) {
+	cfg := &internalconfig.Config{
+		GeminiKey: []internalconfig.GeminiKey{
+			{APIKey: "k", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}}},
+		},
+	}
+
+	mgr := NewManager(nil, nil, nil)
+	mgr.SetConfig(cfg)
+
+	ctx := context.Background()
+	apiKeyAuth := &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}}
+	oauthAuth := &Auth{ID: "oauth-auth", Provider: "gemini", Attributes: map[string]string{"auth_kind": "oauth"}}
+	_, _ = mgr.Register(ctx, apiKeyAuth)
+
+	tests := []struct {
+		name          string
+		auth          *Auth
+		inputModel    string
+		wantModel     string
+		wantOriginal  string
+		expectMapping bool
+	}{
+		{
+			name:          "api_key auth with alias",
+			auth:          apiKeyAuth,
+			inputModel:    "g25p(8192)",
+			wantModel:     "gemini-2.5-pro-exp-03-25(8192)",
+			wantOriginal:  "g25p(8192)",
+			expectMapping: true,
+		},
+		{
+			name:          "oauth auth passthrough",
+			auth:          oauthAuth,
+			inputModel:    "some-model",
+			wantModel:     "some-model",
+			expectMapping: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			metadata := map[string]any{"existing": "value"}
+			resolvedModel, resultMeta := mgr.applyAPIKeyModelMapping(tt.auth, tt.inputModel, metadata)
+
+			if resolvedModel != tt.wantModel {
+				t.Errorf("model = %q, want %q", resolvedModel, tt.wantModel)
+			}
+
+			if resultMeta["existing"] != "value" {
+				t.Error("existing metadata not preserved")
+			}
+
+			original, hasOriginal := resultMeta["model_mapping_original_model"].(string)
+			if tt.expectMapping {
+				if !hasOriginal || original != tt.wantOriginal {
+					t.Errorf("original model = %q, want %q", original, tt.wantOriginal)
+				}
+			} else {
+				if hasOriginal {
+					t.Error("should not set model_mapping_original_model for non-api_key auth")
+				}
+			}
+		})
+	}
+}
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index dc7bc10b..5b1339be 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -15,8 +15,10 @@ import (
 	"time"
 
 	"github.com/google/uuid"
+	internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	log "github.com/sirupsen/logrus"
@@ -120,6 +122,14 @@ type Manager struct {
 	// modelNameMappings stores global model name alias mappings (alias -> upstream name) keyed by channel.
 	modelNameMappings atomic.Value
 
+	// runtimeConfig stores the latest application config for request-time decisions.
+	// It is initialized in NewManager; never Load() before first Store().
+	runtimeConfig atomic.Value
+
+	// apiKeyModelMappings caches resolved model alias mappings for API-key auths.
+	// Keyed by auth.ID, value is alias(lower) -> upstream model (including suffix).
+	apiKeyModelMappings atomic.Value
+
 	// Optional HTTP RoundTripper provider injected by host.
 	rtProvider RoundTripperProvider
 
@@ -135,7 +145,7 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager {
 	if hook == nil {
 		hook = NoopHook{}
 	}
-	return &Manager{
+	manager := &Manager{
 		store:           store,
 		executors:       make(map[string]ProviderExecutor),
 		selector:        selector,
@@ -143,6 +153,10 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager {
 		auths:           make(map[string]*Auth),
 		providerOffsets: make(map[string]int),
 	}
+	// atomic.Value requires non-nil initial value.
+	manager.runtimeConfig.Store(&internalconfig.Config{})
+	manager.apiKeyModelMappings.Store(apiKeyModelMappingTable(nil))
+	return manager
 }
 
 func (m *Manager) SetSelector(selector Selector) {
@@ -171,6 +185,181 @@ func (m *Manager) SetRoundTripperProvider(p RoundTripperProvider) {
 	m.mu.Unlock()
 }
 
+// SetConfig updates the runtime config snapshot used by request-time helpers.
+// Callers should provide the latest config on reload so per-credential alias mapping stays in sync.
+func (m *Manager) SetConfig(cfg *internalconfig.Config) {
+	if m == nil {
+		return
+	}
+	if cfg == nil {
+		cfg = &internalconfig.Config{}
+	}
+	m.runtimeConfig.Store(cfg)
+	m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
+}
+
+func (m *Manager) lookupAPIKeyUpstreamModel(authID, requestedModel string) string {
+	if m == nil {
+		return ""
+	}
+	authID = strings.TrimSpace(authID)
+	if authID == "" {
+		return ""
+	}
+	requestedModel = strings.TrimSpace(requestedModel)
+	if requestedModel == "" {
+		return ""
+	}
+	table, _ := m.apiKeyModelMappings.Load().(apiKeyModelMappingTable)
+	if table == nil {
+		return ""
+	}
+	byAlias := table[authID]
+	if len(byAlias) == 0 {
+		return ""
+	}
+	key := strings.ToLower(thinking.ParseSuffix(requestedModel).ModelName)
+	if key == "" {
+		key = strings.ToLower(requestedModel)
+	}
+	resolved := strings.TrimSpace(byAlias[key])
+	if resolved == "" {
+		return ""
+	}
+	// Preserve thinking suffix from the client's requested model unless config already has one.
+	requestResult := thinking.ParseSuffix(requestedModel)
+	if thinking.ParseSuffix(resolved).HasSuffix {
+		return resolved
+	}
+	if requestResult.HasSuffix && requestResult.RawSuffix != "" {
+		return resolved + "(" + requestResult.RawSuffix + ")"
+	}
+	return resolved
+
+}
+
+func (m *Manager) rebuildAPIKeyModelMappingsFromRuntimeConfig() {
+	if m == nil {
+		return
+	}
+	cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config)
+	if cfg == nil {
+		cfg = &internalconfig.Config{}
+	}
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.rebuildAPIKeyModelMappingsLocked(cfg)
+}
+
+func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) {
+	if m == nil {
+		return
+	}
+	if cfg == nil {
+		cfg = &internalconfig.Config{}
+	}
+
+	out := make(apiKeyModelMappingTable)
+	for _, auth := range m.auths {
+		if auth == nil {
+			continue
+		}
+		if strings.TrimSpace(auth.ID) == "" {
+			continue
+		}
+		kind, _ := auth.AccountInfo()
+		if !strings.EqualFold(strings.TrimSpace(kind), "api_key") {
+			continue
+		}
+
+		byAlias := make(map[string]string)
+		provider := strings.ToLower(strings.TrimSpace(auth.Provider))
+		switch provider {
+		case "gemini":
+			if entry := resolveGeminiAPIKeyConfig(cfg, auth); entry != nil {
+				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+			}
+		case "claude":
+			if entry := resolveClaudeAPIKeyConfig(cfg, auth); entry != nil {
+				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+			}
+		case "codex":
+			if entry := resolveCodexAPIKeyConfig(cfg, auth); entry != nil {
+				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+			}
+		case "vertex":
+			if entry := resolveVertexAPIKeyConfig(cfg, auth); entry != nil {
+				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+			}
+		default:
+			// OpenAI-compat uses config selection from auth.Attributes.
+			providerKey := ""
+			compatName := ""
+			if auth.Attributes != nil {
+				providerKey = strings.TrimSpace(auth.Attributes["provider_key"])
+				compatName = strings.TrimSpace(auth.Attributes["compat_name"])
+			}
+			if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
+				if entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider); entry != nil {
+					compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+				}
+			}
+		}
+
+		if len(byAlias) > 0 {
+			out[auth.ID] = byAlias
+		}
+	}
+
+	m.apiKeyModelMappings.Store(out)
+}
+
+func compileAPIKeyModelMappingsForModels[T interface {
+	GetName() string
+	GetAlias() string
+}](out map[string]string, models []T) {
+	if out == nil {
+		return
+	}
+	for i := range models {
+		alias := strings.TrimSpace(models[i].GetAlias())
+		name := strings.TrimSpace(models[i].GetName())
+		if alias == "" || name == "" {
+			continue
+		}
+		aliasKey := strings.ToLower(thinking.ParseSuffix(alias).ModelName)
+		if aliasKey == "" {
+			aliasKey = strings.ToLower(alias)
+		}
+		// Config priority: first alias wins.
+		if _, exists := out[aliasKey]; exists {
+			continue
+		}
+		out[aliasKey] = name
+		// Also allow direct lookup by upstream name (case-insensitive), so lookups on already-upstream
+		// models remain a cheap no-op.
+		nameKey := strings.ToLower(thinking.ParseSuffix(name).ModelName)
+		if nameKey == "" {
+			nameKey = strings.ToLower(name)
+		}
+		if nameKey != "" {
+			if _, exists := out[nameKey]; !exists {
+				out[nameKey] = name
+			}
+		}
+		// Preserve config suffix priority by seeding a base-name lookup when name already has suffix.
+		nameResult := thinking.ParseSuffix(name)
+		if nameResult.HasSuffix {
+			baseKey := strings.ToLower(strings.TrimSpace(nameResult.ModelName))
+			if baseKey != "" {
+				if _, exists := out[baseKey]; !exists {
+					out[baseKey] = name
+				}
+			}
+		}
+	}
+}
+
 // SetRetryConfig updates retry attempts and cooldown wait interval.
 func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) {
 	if m == nil {
@@ -219,6 +408,7 @@ func (m *Manager) Register(ctx context.Context, auth *Auth) (*Auth, error) {
 	m.mu.Lock()
 	m.auths[auth.ID] = auth.Clone()
 	m.mu.Unlock()
+	m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
 	_ = m.persist(ctx, auth)
 	m.hook.OnAuthRegistered(ctx, auth.Clone())
 	return auth.Clone(), nil
@@ -237,6 +427,7 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
 	auth.EnsureIndex()
 	m.auths[auth.ID] = auth.Clone()
 	m.mu.Unlock()
+	m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
 	_ = m.persist(ctx, auth)
 	m.hook.OnAuthUpdated(ctx, auth.Clone())
 	return auth.Clone(), nil
@@ -261,6 +452,11 @@ func (m *Manager) Load(ctx context.Context) error {
 		auth.EnsureIndex()
 		m.auths[auth.ID] = auth.Clone()
 	}
+	cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config)
+	if cfg == nil {
+		cfg = &internalconfig.Config{}
+	}
+	m.rebuildAPIKeyModelMappingsLocked(cfg)
 	return nil
 }
 
@@ -558,6 +754,7 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
 		execReq := req
 		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
 		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
 		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -606,6 +803,7 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
 		execReq := req
 		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
 		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
 		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -654,6 +852,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
 		execReq := req
 		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
 		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
 		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
 		if errStream != nil {
 			rerr := &Error{Message: errStream.Error()}
@@ -712,7 +911,6 @@ func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string]
 		return metadata
 	}
 	keys := []string{
-		util.ThinkingOriginalModelMetadataKey,
 		util.GeminiOriginalModelMetadataKey,
 		util.ModelMappingOriginalModelMetadataKey,
 	}
@@ -740,6 +938,215 @@ func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string]
 	return out
 }
 
+func (m *Manager) applyAPIKeyModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) {
+	if m == nil || auth == nil {
+		return requestedModel, metadata
+	}
+
+	kind, _ := auth.AccountInfo()
+	if !strings.EqualFold(strings.TrimSpace(kind), "api_key") {
+		return requestedModel, metadata
+	}
+
+	requestedModel = strings.TrimSpace(requestedModel)
+	if requestedModel == "" {
+		return requestedModel, metadata
+	}
+
+	// Fast path: lookup per-auth mapping table (keyed by auth.ID).
+	if resolved := m.lookupAPIKeyUpstreamModel(auth.ID, requestedModel); resolved != "" {
+		return applyUpstreamModelOverride(requestedModel, resolved, metadata)
+	}
+
+	// Slow path: scan config for the matching credential entry and resolve alias.
+	// This acts as a safety net if mappings are stale or auth.ID is missing.
+	cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config)
+	if cfg == nil {
+		cfg = &internalconfig.Config{}
+	}
+
+	provider := strings.ToLower(strings.TrimSpace(auth.Provider))
+	upstreamModel := ""
+	switch provider {
+	case "gemini":
+		upstreamModel = resolveUpstreamModelForGeminiAPIKey(cfg, auth, requestedModel)
+	case "claude":
+		upstreamModel = resolveUpstreamModelForClaudeAPIKey(cfg, auth, requestedModel)
+	case "codex":
+		upstreamModel = resolveUpstreamModelForCodexAPIKey(cfg, auth, requestedModel)
+	case "vertex":
+		upstreamModel = resolveUpstreamModelForVertexAPIKey(cfg, auth, requestedModel)
+	default:
+		upstreamModel = resolveUpstreamModelForOpenAICompatAPIKey(cfg, auth, requestedModel)
+	}
+
+	// applyUpstreamModelOverride lives in model_name_mappings.go.
+	return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata)
+}
+
+// APIKeyConfigEntry is a generic interface for API key configurations.
+type APIKeyConfigEntry interface {
+	GetAPIKey() string
+	GetBaseURL() string
+}
+
+func resolveAPIKeyConfig[T APIKeyConfigEntry](entries []T, auth *Auth) *T {
+	if auth == nil || len(entries) == 0 {
+		return nil
+	}
+	attrKey, attrBase := "", ""
+	if auth.Attributes != nil {
+		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
+		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
+	}
+	for i := range entries {
+		entry := &entries[i]
+		cfgKey := strings.TrimSpace((*entry).GetAPIKey())
+		cfgBase := strings.TrimSpace((*entry).GetBaseURL())
+		if attrKey != "" && attrBase != "" {
+			if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
+				return entry
+			}
+			continue
+		}
+		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
+			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
+				return entry
+			}
+		}
+		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
+			return entry
+		}
+	}
+	if attrKey != "" {
+		for i := range entries {
+			entry := &entries[i]
+			if strings.EqualFold(strings.TrimSpace((*entry).GetAPIKey()), attrKey) {
+				return entry
+			}
+		}
+	}
+	return nil
+}
+
+func resolveGeminiAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.GeminiKey {
+	if cfg == nil {
+		return nil
+	}
+	return resolveAPIKeyConfig(cfg.GeminiKey, auth)
+}
+
+func resolveClaudeAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.ClaudeKey {
+	if cfg == nil {
+		return nil
+	}
+	return resolveAPIKeyConfig(cfg.ClaudeKey, auth)
+}
+
+func resolveCodexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.CodexKey {
+	if cfg == nil {
+		return nil
+	}
+	return resolveAPIKeyConfig(cfg.CodexKey, auth)
+}
+
+func resolveVertexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.VertexCompatKey {
+	if cfg == nil {
+		return nil
+	}
+	return resolveAPIKeyConfig(cfg.VertexCompatAPIKey, auth)
+}
+
+func resolveUpstreamModelForGeminiAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
+	entry := resolveGeminiAPIKeyConfig(cfg, auth)
+	if entry == nil {
+		return ""
+	}
+	return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
+}
+
+func resolveUpstreamModelForClaudeAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
+	entry := resolveClaudeAPIKeyConfig(cfg, auth)
+	if entry == nil {
+		return ""
+	}
+	return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
+}
+
+func resolveUpstreamModelForCodexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
+	entry := resolveCodexAPIKeyConfig(cfg, auth)
+	if entry == nil {
+		return ""
+	}
+	return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
+}
+
+func resolveUpstreamModelForVertexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
+	entry := resolveVertexAPIKeyConfig(cfg, auth)
+	if entry == nil {
+		return ""
+	}
+	return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
+}
+
+func resolveUpstreamModelForOpenAICompatAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string {
+	providerKey := ""
+	compatName := ""
+	if auth != nil && len(auth.Attributes) > 0 {
+		providerKey = strings.TrimSpace(auth.Attributes["provider_key"])
+		compatName = strings.TrimSpace(auth.Attributes["compat_name"])
+	}
+	if compatName == "" && !strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
+		return ""
+	}
+	entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider)
+	if entry == nil {
+		return ""
+	}
+	return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
+}
+
+type apiKeyModelMappingTable map[string]map[string]string
+
+func resolveOpenAICompatConfig(cfg *internalconfig.Config, providerKey, compatName, authProvider string) *internalconfig.OpenAICompatibility {
+	if cfg == nil {
+		return nil
+	}
+	candidates := make([]string, 0, 3)
+	if v := strings.TrimSpace(compatName); v != "" {
+		candidates = append(candidates, v)
+	}
+	if v := strings.TrimSpace(providerKey); v != "" {
+		candidates = append(candidates, v)
+	}
+	if v := strings.TrimSpace(authProvider); v != "" {
+		candidates = append(candidates, v)
+	}
+	for i := range cfg.OpenAICompatibility {
+		compat := &cfg.OpenAICompatibility[i]
+		for _, candidate := range candidates {
+			if candidate != "" && strings.EqualFold(strings.TrimSpace(candidate), compat.Name) {
+				return compat
+			}
+		}
+	}
+	return nil
+}
+
+func asModelAliasEntries[T interface {
+	GetName() string
+	GetAlias() string
+}](models []T) []modelMappingEntry {
+	if len(models) == 0 {
+		return nil
+	}
+	out := make([]modelMappingEntry, 0, len(models))
+	for i := range models {
+		out = append(out, models[i])
+	}
+	return out
+}
+
 func (m *Manager) normalizeProviders(providers []string) []string {
 	if len(providers) == 0 {
 		return nil
diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/model_name_mappings.go
index 03380c09..7fac0b5b 100644
--- a/sdk/cliproxy/auth/model_name_mappings.go
+++ b/sdk/cliproxy/auth/model_name_mappings.go
@@ -4,9 +4,15 @@ import (
 	"strings"
 
 	internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 )
 
+type modelMappingEntry interface {
+	GetName() string
+	GetAlias() string
+}
+
 type modelNameMappingTable struct {
 	// reverse maps channel -> alias (lower) -> original upstream model name.
 	reverse map[string]map[string]string
@@ -71,9 +77,14 @@ func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.Mod
 // requested model for response translation.
 func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) {
 	upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel)
+	return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata)
+}
+
+func applyUpstreamModelOverride(requestedModel, upstreamModel string, metadata map[string]any) (string, map[string]any) {
 	if upstreamModel == "" {
 		return requestedModel, metadata
 	}
+
 	out := make(map[string]any, 1)
 	if len(metadata) > 0 {
 		out = make(map[string]any, len(metadata)+1)
@@ -81,24 +92,92 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta
 			out[k] = v
 		}
 	}
-	// Store the requested alias (e.g., "gp") so downstream can use it to look up
-	// model metadata from the global registry where it was registered under this alias.
+
+	// Preserve the original client model string (including any suffix) for downstream.
 	out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 	return upstreamModel, out
 }
 
+func resolveModelAliasFromConfigModels(requestedModel string, models []modelMappingEntry) string {
+	requestedModel = strings.TrimSpace(requestedModel)
+	if requestedModel == "" {
+		return ""
+	}
+	if len(models) == 0 {
+		return ""
+	}
+
+	requestResult := thinking.ParseSuffix(requestedModel)
+	base := requestResult.ModelName
+	candidates := []string{base}
+	if base != requestedModel {
+		candidates = append(candidates, requestedModel)
+	}
+
+	preserveSuffix := func(resolved string) string {
+		resolved = strings.TrimSpace(resolved)
+		if resolved == "" {
+			return ""
+		}
+		if thinking.ParseSuffix(resolved).HasSuffix {
+			return resolved
+		}
+		if requestResult.HasSuffix && requestResult.RawSuffix != "" {
+			return resolved + "(" + requestResult.RawSuffix + ")"
+		}
+		return resolved
+	}
+
+	for i := range models {
+		name := strings.TrimSpace(models[i].GetName())
+		alias := strings.TrimSpace(models[i].GetAlias())
+		for _, candidate := range candidates {
+			if candidate == "" {
+				continue
+			}
+			if alias != "" && strings.EqualFold(alias, candidate) {
+				if name != "" {
+					return preserveSuffix(name)
+				}
+				return preserveSuffix(candidate)
+			}
+			if name != "" && strings.EqualFold(name, candidate) {
+				return preserveSuffix(name)
+			}
+		}
+	}
+	return ""
+}
+
+// resolveOAuthUpstreamModel resolves the upstream model name from OAuth model mappings.
+// If a mapping exists, returns the original (upstream) model name that corresponds
+// to the requested alias.
+//
+// If the requested model contains a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
+// the suffix is preserved in the returned model name. However, if the mapping's
+// original name already contains a suffix, the config suffix takes priority.
 func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string {
+	return resolveUpstreamModelFromMappingTable(m, auth, requestedModel, modelMappingChannel(auth))
+}
+
+func resolveUpstreamModelFromMappingTable(m *Manager, auth *Auth, requestedModel, channel string) string {
 	if m == nil || auth == nil {
 		return ""
 	}
-	channel := modelMappingChannel(auth)
 	if channel == "" {
 		return ""
 	}
-	key := strings.ToLower(strings.TrimSpace(requestedModel))
-	if key == "" {
-		return ""
+
+	// Extract thinking suffix from requested model using ParseSuffix
+	requestResult := thinking.ParseSuffix(requestedModel)
+	baseModel := requestResult.ModelName
+
+	// Candidate keys to match: base model and raw input (handles suffix-parsing edge cases).
+	candidates := []string{baseModel}
+	if baseModel != requestedModel {
+		candidates = append(candidates, requestedModel)
 	}
+
 	raw := m.modelNameMappings.Load()
 	table, _ := raw.(*modelNameMappingTable)
 	if table == nil || table.reverse == nil {
@@ -108,11 +187,32 @@ func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) s
 	if rev == nil {
 		return ""
 	}
-	original := strings.TrimSpace(rev[key])
-	if original == "" || strings.EqualFold(original, requestedModel) {
-		return ""
+
+	for _, candidate := range candidates {
+		key := strings.ToLower(strings.TrimSpace(candidate))
+		if key == "" {
+			continue
+		}
+		original := strings.TrimSpace(rev[key])
+		if original == "" {
+			continue
+		}
+		if strings.EqualFold(original, baseModel) {
+			return ""
+		}
+
+		// If config already has suffix, it takes priority.
+		if thinking.ParseSuffix(original).HasSuffix {
+			return original
+		}
+		// Preserve user's thinking suffix on the resolved model.
+		if requestResult.HasSuffix && requestResult.RawSuffix != "" {
+			return original + "(" + requestResult.RawSuffix + ")"
+		}
+		return original
 	}
-	return original
+
+	return ""
 }
 
 // modelMappingChannel extracts the OAuth model mapping channel from an Auth object.
diff --git a/sdk/cliproxy/auth/model_name_mappings_test.go b/sdk/cliproxy/auth/model_name_mappings_test.go
new file mode 100644
index 00000000..121450cc
--- /dev/null
+++ b/sdk/cliproxy/auth/model_name_mappings_test.go
@@ -0,0 +1,187 @@
+package auth
+
+import (
+	"testing"
+
+	internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+)
+
+func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name     string
+		mappings map[string][]internalconfig.ModelNameMapping
+		channel  string
+		input    string
+		want     string
+	}{
+		{
+			name: "numeric suffix preserved",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+			},
+			channel: "gemini-cli",
+			input:   "gemini-2.5-pro(8192)",
+			want:    "gemini-2.5-pro-exp-03-25(8192)",
+		},
+		{
+			name: "level suffix preserved",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"claude": {{Name: "claude-sonnet-4-5-20250514", Alias: "claude-sonnet-4-5"}},
+			},
+			channel: "claude",
+			input:   "claude-sonnet-4-5(high)",
+			want:    "claude-sonnet-4-5-20250514(high)",
+		},
+		{
+			name: "no suffix unchanged",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+			},
+			channel: "gemini-cli",
+			input:   "gemini-2.5-pro",
+			want:    "gemini-2.5-pro-exp-03-25",
+		},
+		{
+			name: "config suffix takes priority",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"claude": {{Name: "claude-sonnet-4-5-20250514(low)", Alias: "claude-sonnet-4-5"}},
+			},
+			channel: "claude",
+			input:   "claude-sonnet-4-5(high)",
+			want:    "claude-sonnet-4-5-20250514(low)",
+		},
+		{
+			name: "auto suffix preserved",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+			},
+			channel: "gemini-cli",
+			input:   "gemini-2.5-pro(auto)",
+			want:    "gemini-2.5-pro-exp-03-25(auto)",
+		},
+		{
+			name: "none suffix preserved",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+			},
+			channel: "gemini-cli",
+			input:   "gemini-2.5-pro(none)",
+			want:    "gemini-2.5-pro-exp-03-25(none)",
+		},
+		{
+			name: "case insensitive alias lookup with suffix",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "Gemini-2.5-Pro"}},
+			},
+			channel: "gemini-cli",
+			input:   "gemini-2.5-pro(high)",
+			want:    "gemini-2.5-pro-exp-03-25(high)",
+		},
+		{
+			name: "no mapping returns empty",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+			},
+			channel: "gemini-cli",
+			input:   "unknown-model(high)",
+			want:    "",
+		},
+		{
+			name: "wrong channel returns empty",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+			},
+			channel: "claude",
+			input:   "gemini-2.5-pro(high)",
+			want:    "",
+		},
+		{
+			name: "empty suffix filtered out",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+			},
+			channel: "gemini-cli",
+			input:   "gemini-2.5-pro()",
+			want:    "gemini-2.5-pro-exp-03-25",
+		},
+		{
+			name: "incomplete suffix treated as no suffix",
+			mappings: map[string][]internalconfig.ModelNameMapping{
+				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro(high"}},
+			},
+			channel: "gemini-cli",
+			input:   "gemini-2.5-pro(high",
+			want:    "gemini-2.5-pro-exp-03-25",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			mgr := NewManager(nil, nil, nil)
+			mgr.SetConfig(&internalconfig.Config{})
+			mgr.SetOAuthModelMappings(tt.mappings)
+
+			auth := createAuthForChannel(tt.channel)
+			got := mgr.resolveOAuthUpstreamModel(auth, tt.input)
+			if got != tt.want {
+				t.Errorf("resolveOAuthUpstreamModel(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}
+
+func createAuthForChannel(channel string) *Auth {
+	switch channel {
+	case "gemini-cli":
+		return &Auth{Provider: "gemini-cli"}
+	case "claude":
+		return &Auth{Provider: "claude", Attributes: map[string]string{"auth_kind": "oauth"}}
+	case "vertex":
+		return &Auth{Provider: "vertex", Attributes: map[string]string{"auth_kind": "oauth"}}
+	case "codex":
+		return &Auth{Provider: "codex", Attributes: map[string]string{"auth_kind": "oauth"}}
+	case "aistudio":
+		return &Auth{Provider: "aistudio"}
+	case "antigravity":
+		return &Auth{Provider: "antigravity"}
+	case "qwen":
+		return &Auth{Provider: "qwen"}
+	case "iflow":
+		return &Auth{Provider: "iflow"}
+	default:
+		return &Auth{Provider: channel}
+	}
+}
+
+func TestApplyOAuthModelMapping_SuffixPreservation(t *testing.T) {
+	t.Parallel()
+
+	mappings := map[string][]internalconfig.ModelNameMapping{
+		"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
+	}
+
+	mgr := NewManager(nil, nil, nil)
+	mgr.SetConfig(&internalconfig.Config{})
+	mgr.SetOAuthModelMappings(mappings)
+
+	auth := &Auth{ID: "test-auth-id", Provider: "gemini-cli"}
+	metadata := map[string]any{"existing": "value"}
+
+	resolvedModel, resultMeta := mgr.applyOAuthModelMapping(auth, "gemini-2.5-pro(8192)", metadata)
+	if resolvedModel != "gemini-2.5-pro-exp-03-25(8192)" {
+		t.Errorf("applyOAuthModelMapping() model = %q, want %q", resolvedModel, "gemini-2.5-pro-exp-03-25(8192)")
+	}
+
+	originalModel, ok := resultMeta["model_mapping_original_model"].(string)
+	if !ok || originalModel != "gemini-2.5-pro(8192)" {
+		t.Errorf("applyOAuthModelMapping() metadata[model_mapping_original_model] = %v, want %q", resultMeta["model_mapping_original_model"], "gemini-2.5-pro(8192)")
+	}
+
+	if resultMeta["existing"] != "value" {
+		t.Errorf("applyOAuthModelMapping() metadata[existing] = %v, want %q", resultMeta["existing"], "value")
+	}
+}
diff --git a/sdk/cliproxy/builder.go b/sdk/cliproxy/builder.go
index 51d5dbac..2e2427f9 100644
--- a/sdk/cliproxy/builder.go
+++ b/sdk/cliproxy/builder.go
@@ -215,6 +215,7 @@ func (b *Builder) Build() (*Service, error) {
 	}
 	// Attach a default RoundTripper provider so providers can opt-in per-auth transports.
 	coreManager.SetRoundTripperProvider(newDefaultRoundTripperProvider())
+	coreManager.SetConfig(b.cfg)
 	coreManager.SetOAuthModelMappings(b.cfg.OAuthModelMappings)
 
 	service := &Service{
diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go
index 695a77c8..71603479 100644
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -553,6 +553,7 @@ func (s *Service) Run(ctx context.Context) error {
 		s.cfg = newCfg
 		s.cfgMu.Unlock()
 		if s.coreManager != nil {
+			s.coreManager.SetConfig(newCfg)
 			s.coreManager.SetOAuthModelMappings(newCfg.OAuthModelMappings)
 		}
 		s.rebindExecutors()
@@ -825,6 +826,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 							OwnedBy:     compat.Name,
 							Type:        "openai-compatibility",
 							DisplayName: modelID,
+							UserDefined: true,
 						})
 					}
 					// Register and return
@@ -1157,6 +1159,7 @@ func buildConfigModels[T modelEntry](models []T, ownedBy, modelType string) []*M
 			OwnedBy:     ownedBy,
 			Type:        modelType,
 			DisplayName: display,
+			UserDefined: true,
 		}
 		if name != "" {
 			if upstream := registry.LookupStaticModelInfo(name); upstream != nil && upstream.Thinking != nil {
diff --git a/test/model_alias_thinking_suffix_test.go b/test/model_alias_thinking_suffix_test.go
index 270e0cc7..236fca4a 100644
--- a/test/model_alias_thinking_suffix_test.go
+++ b/test/model_alias_thinking_suffix_test.go
@@ -3,9 +3,10 @@ package test
 import (
 	"testing"
 
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )
 
 // TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md
@@ -178,7 +179,7 @@ func TestModelAliasThinkingSuffix(t *testing.T) {
 				}
 			}
 
-			// Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow
+			// Step 5: Test Gemini 2.5 thinkingBudget application using thinking.ApplyThinking
 			if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) {
 				body := []byte(`{"request":{"contents":[]}}`)
 
@@ -195,8 +196,13 @@ func TestModelAliasThinkingSuffix(t *testing.T) {
 					testMetadata[k] = v
 				}
 
-				// Use the exported ApplyThinkingMetadataCLI which includes the fallback logic
-				result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel)
+				// Merge thinking config from metadata into body
+				body = applyThinkingFromMetadata(body, testMetadata)
+
+				// Use thinking.ApplyThinking for unified thinking config handling
+				// Note: ApplyThinking now takes model string, not *ModelInfo
+				result, _ := thinking.ApplyThinking(body, tt.upstreamModel, "gemini-cli")
+
 				budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget")
 
 				expectedBudget := tt.expectedValue.(int)
@@ -209,3 +215,48 @@ func TestModelAliasThinkingSuffix(t *testing.T) {
 		})
 	}
 }
+
+// applyThinkingFromMetadata merges thinking configuration from metadata into the payload.
+func applyThinkingFromMetadata(payload []byte, metadata map[string]any) []byte {
+	if len(metadata) == 0 {
+		return payload
+	}
+
+	// Merge thinking_budget from metadata if present
+	if budget, ok := metadata["thinking_budget"]; ok {
+		if budgetVal, okNum := parseNumberToInt(budget); okNum {
+			payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", budgetVal)
+		}
+	}
+
+	// Merge reasoning_effort from metadata if present
+	if effort, ok := metadata["reasoning_effort"]; ok {
+		if effortStr, okStr := effort.(string); okStr && effortStr != "" {
+			payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingLevel", effortStr)
+		}
+	}
+
+	// Merge thinking_include_thoughts from metadata if present
+	if include, ok := metadata["thinking_include_thoughts"]; ok {
+		if includeBool, okBool := include.(bool); okBool {
+			payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.includeThoughts", includeBool)
+		}
+	}
+
+	return payload
+}
+
+// parseNumberToInt safely converts various numeric types to int
+func parseNumberToInt(raw any) (int, bool) {
+	switch v := raw.(type) {
+	case int:
+		return v, true
+	case int32:
+		return int(v), true
+	case int64:
+		return int(v), true
+	case float64:
+		return int(v), true
+	}
+	return 0, false
+}
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 74a1bd8a..d0e88c78 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -2,796 +2,882 @@ package test
 
 import (
 	"fmt"
-	"strings"
 	"testing"
 	"time"
 
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
 
+	// Import provider packages to trigger init() registration of ProviderAppliers
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
+
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
 )
 
-// isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible
-// model that should have reasoning effort passed through even if not in registry.
-// This simulates the allowCompat behavior from OpenAICompatExecutor.
-func isOpenAICompatModel(model string) bool {
-	return model == "openai-compat"
-}
-
-// registerCoreModels loads representative models across providers into the registry
-// so NormalizeThinkingBudget and level validation use real ranges.
-func registerCoreModels(t *testing.T) func() {
-	t.Helper()
+// TestThinkingE2EMatrix tests the thinking configuration transformation using the real data flow path.
+// Data flow: Input JSON → TranslateRequest → ApplyThinking → Validate Output
+// No helper functions are used; all test data is inline.
+func TestThinkingE2EMatrix(t *testing.T) {
+	// Register test models directly
 	reg := registry.GetGlobalRegistry()
-	uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano())
-	reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
-	reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels())
-	reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels())
-	reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels())
-	// Custom openai-compatible model with forced thinking suffix passthrough.
-	// No Thinking field - simulates an external model added via openai-compat
-	// where the registry has no knowledge of its thinking capabilities.
-	// The allowCompat flag should preserve reasoning effort for such models.
-	customOpenAIModels := []*registry.ModelInfo{
+	uid := fmt.Sprintf("thinking-e2e-%d", time.Now().UnixNano())
+
+	testModels := []*registry.ModelInfo{
 		{
-			ID:          "openai-compat",
+			ID:          "level-model",
 			Object:      "model",
 			Created:     1700000000,
-			OwnedBy:     "custom-provider",
+			OwnedBy:     "test",
 			Type:        "openai",
-			DisplayName: "OpenAI Compatible Model",
-			Description: "OpenAI-compatible model with forced thinking suffix support",
+			DisplayName: "Level Model",
+			Thinking: &registry.ThinkingSupport{
+				Levels:         []string{"minimal", "low", "medium", "high"},
+				ZeroAllowed:    false,
+				DynamicAllowed: false,
+			},
+		},
+		{
+			ID:          "gemini-budget-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "gemini",
+			DisplayName: "Gemini Budget Model",
+			Thinking: &registry.ThinkingSupport{
+				Min:            128,
+				Max:            20000,
+				ZeroAllowed:    false,
+				DynamicAllowed: true,
+			},
+		},
+		{
+			ID:          "gemini-mixed-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "gemini",
+			DisplayName: "Gemini Mixed Model",
+			Thinking: &registry.ThinkingSupport{
+				Min:            128,
+				Max:            32768,
+				Levels:         []string{"low", "high"},
+				ZeroAllowed:    false,
+				DynamicAllowed: true,
+			},
+		},
+		{
+			ID:          "claude-budget-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "claude",
+			DisplayName: "Claude Budget Model",
+			Thinking: &registry.ThinkingSupport{
+				Min:            1024,
+				Max:            128000,
+				ZeroAllowed:    true,
+				DynamicAllowed: false,
+			},
+		},
+		{
+			ID:          "no-thinking-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "openai",
+			DisplayName: "No Thinking Model",
+			Thinking:    nil,
+		},
+		{
+			ID:          "user-defined-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "openai",
+			DisplayName: "User Defined Model",
+			UserDefined: true,
+			Thinking:    nil,
 		},
 	}
-	reg.RegisterClient(uid+"-custom-openai", "codex", customOpenAIModels)
-	return func() {
-		reg.UnregisterClient(uid + "-gemini")
-		reg.UnregisterClient(uid + "-claude")
-		reg.UnregisterClient(uid + "-openai")
-		reg.UnregisterClient(uid + "-qwen")
-		reg.UnregisterClient(uid + "-custom-openai")
-	}
-}
 
-var (
-	thinkingTestModels = []string{
-		"gpt-5",           // level-based thinking model
-		"gemini-2.5-pro",  // numeric-budget thinking model
-		"qwen3-code-plus", // no thinking support
-		"openai-compat",   // allowCompat=true (OpenAI-compatible channel)
-	}
-	thinkingTestFromProtocols = []string{"openai", "claude", "gemini", "openai-response"}
-	thinkingTestToProtocols   = []string{"gemini", "claude", "openai", "codex"}
+	reg.RegisterClient(uid, "test", testModels)
+	defer reg.UnregisterClient(uid)
 
-	// Numeric budgets and their level equivalents:
-	// -1 -> auto
-	// 0 -> none
-	// 1..1024 -> low
-	// 1025..8192 -> medium
-	// 8193..24576 -> high
-	// >24576 -> model highest level (right-most in Levels)
-	thinkingNumericSamples = []int{-1, 0, 1023, 1025, 8193, 64000}
-
-	// Levels and their numeric equivalents:
-	// auto -> -1
-	// none -> 0
-	// minimal -> 512
-	// low -> 1024
-	// medium -> 8192
-	// high -> 24576
-	// xhigh -> 32768
-	// invalid -> invalid (no mapping)
-	thinkingLevelSamples = []string{"auto", "none", "minimal", "low", "medium", "high", "xhigh", "invalid"}
-)
-
-func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
-	switch fromProtocol {
-	case "gemini":
-		return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix))
-	case "openai-response":
-		return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix))
-	default: // openai / claude and other chat-style payloads
-		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix))
-	}
-}
-
-// normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
-func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) {
-	body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
-	if err := executor.ValidateThinkingConfig(body, upstreamModel); err != nil {
-		return body, err
-	}
-	body, _ = sjson.SetBytes(body, "model", upstreamModel)
-	body, _ = sjson.SetBytes(body, "stream", true)
-	body, _ = sjson.DeleteBytes(body, "previous_response_id")
-	return body, nil
-}
-
-// buildBodyForProtocol runs a minimal request through the same translation and
-// thinking pipeline used in executors for the given target protocol.
-func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) {
-	t.Helper()
-	normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix)
-	upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata)
-	raw := buildRawPayload(fromProtocol, modelWithSuffix)
-	stream := fromProtocol != toProtocol
-
-	body := sdktranslator.TranslateRequest(
-		sdktranslator.FromString(fromProtocol),
-		sdktranslator.FromString(toProtocol),
-		normalizedModel,
-		raw,
-		stream,
-	)
-
-	var err error
-	allowCompat := isOpenAICompatModel(normalizedModel)
-	switch toProtocol {
-	case "gemini":
-		body = executor.ApplyThinkingMetadata(body, metadata, normalizedModel)
-		body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body)
-		body = util.NormalizeGeminiThinkingBudget(normalizedModel, body)
-		body = util.StripThinkingConfigIfUnsupported(normalizedModel, body)
-	case "claude":
-		if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok {
-			body = util.ApplyClaudeThinkingConfig(body, budget)
-		}
-	case "openai":
-		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning_effort", allowCompat)
-		body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
-		err = executor.ValidateThinkingConfig(body, upstreamModel)
-	case "codex": // OpenAI responses / codex
-		// Codex does not support allowCompat; always use false.
-		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning.effort", false)
-		// Mirror CodexExecutor final normalization and model override so tests log the final body.
-		body, err = normalizeCodexPayload(body, upstreamModel, false)
-	default:
+	type testCase struct {
+		name            string
+		from            string
+		to              string
+		modelSuffix     string
+		inputJSON       string
+		expectField     string
+		expectValue     string
+		includeThoughts string
+		expectErr       bool
 	}
 
-	// Mirror executor behavior: final payload uses the upstream (base) model name.
-	if upstreamModel != "" {
-		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	cases := []testCase{
+		// level-model (Levels=minimal/low/medium/high, ZeroAllowed=false, DynamicAllowed=false)
+		// Case 1: No suffix, translator adds default medium for codex
+		{
+			name:        "1",
+			from:        "openai",
+			to:          "codex",
+			modelSuffix: "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 2: Explicit medium level
+		{
+			name:        "2",
+			from:        "openai",
+			to:          "codex",
+			modelSuffix: "level-model(medium)",
+			inputJSON:   `{"model":"level-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 3: xhigh not in Levels=[minimal,low,medium,high] → ValidateConfig returns error
+		{
+			name:        "3",
+			from:        "openai",
+			to:          "codex",
+			modelSuffix: "level-model(xhigh)",
+			inputJSON:   `{"model":"level-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 4: none → ModeNone, ZeroAllowed=false → clamp to min level (minimal)
+		{
+			name:        "4",
+			from:        "openai",
+			to:          "codex",
+			modelSuffix: "level-model(none)",
+			inputJSON:   `{"model":"level-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 5: auto → ModeAuto, DynamicAllowed=false → convert to mid-range (medium)
+		{
+			name:        "5",
+			from:        "openai",
+			to:          "codex",
+			modelSuffix: "level-model(auto)",
+			inputJSON:   `{"model":"level-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 6: No suffix from gemini → translator injects default reasoning.effort: medium
+		{
+			name:        "6",
+			from:        "gemini",
+			to:          "codex",
+			modelSuffix: "level-model",
+			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 7: 8192 → medium (1025-8192)
+		{
+			name:        "7",
+			from:        "gemini",
+			to:          "codex",
+			modelSuffix: "level-model(8192)",
+			inputJSON:   `{"model":"level-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 8: 64000 → xhigh → not supported → error
+		{
+			name:        "8",
+			from:        "gemini",
+			to:          "codex",
+			modelSuffix: "level-model(64000)",
+			inputJSON:   `{"model":"level-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 9: 0 → ModeNone, ZeroAllowed=false → clamp to min level (minimal)
+		{
+			name:        "9",
+			from:        "gemini",
+			to:          "codex",
+			modelSuffix: "level-model(0)",
+			inputJSON:   `{"model":"level-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 10: -1 → ModeAuto, DynamicAllowed=false → convert to mid-range (medium)
+		{
+			name:        "10",
+			from:        "gemini",
+			to:          "codex",
+			modelSuffix: "level-model(-1)",
+			inputJSON:   `{"model":"level-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 11: No suffix from claude → no thinking config
+		{
+			name:        "11",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 12: 8192 → medium
+		{
+			name:        "12",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "level-model(8192)",
+			inputJSON:   `{"model":"level-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 13: 64000 → xhigh → not supported → error
+		{
+			name:        "13",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "level-model(64000)",
+			inputJSON:   `{"model":"level-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 14: 0 → ModeNone, ZeroAllowed=false → clamp to min level (minimal)
+		{
+			name:        "14",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "level-model(0)",
+			inputJSON:   `{"model":"level-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 15: -1 → ModeAuto, DynamicAllowed=false → convert to mid-range (medium)
+		{
+			name:        "15",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "level-model(-1)",
+			inputJSON:   `{"model":"level-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+
+		// gemini-budget-model (Min=128, Max=20000, ZeroAllowed=false, DynamicAllowed=true)
+		{
+			name:        "16",
+			from:        "openai",
+			to:          "gemini",
+			modelSuffix: "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// medium → 8192
+		{
+			name:            "17",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(medium)",
+			inputJSON:       `{"model":"gemini-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// xhigh → 32768 → clamp to 20000
+		{
+			name:            "18",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(xhigh)",
+			inputJSON:       `{"model":"gemini-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// none → 0 → ZeroAllowed=false → clamp to 128, includeThoughts=false
+		{
+			name:            "19",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(none)",
+			inputJSON:       `{"model":"gemini-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "128",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// auto → -1 dynamic allowed
+		{
+			name:            "20",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(auto)",
+			inputJSON:       `{"model":"gemini-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:        "21",
+			from:        "claude",
+			to:          "gemini",
+			modelSuffix: "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:            "22",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(8192)",
+			inputJSON:       `{"model":"gemini-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:            "23",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(64000)",
+			inputJSON:       `{"model":"gemini-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:            "24",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(0)",
+			inputJSON:       `{"model":"gemini-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "128",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		{
+			name:            "25",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-budget-model(-1)",
+			inputJSON:       `{"model":"gemini-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// gemini-mixed-model (Min=128, Max=32768, Levels=low/high, ZeroAllowed=false, DynamicAllowed=true)
+		{
+			name:        "26",
+			from:        "openai",
+			to:          "gemini",
+			modelSuffix: "gemini-mixed-model",
+			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// high → use thinkingLevel
+		{
+			name:            "27",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "gemini-mixed-model(high)",
+			inputJSON:       `{"model":"gemini-mixed-model(high)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "high",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// xhigh → not in Levels=[low,high] → error
+		{
+			name:        "28",
+			from:        "openai",
+			to:          "gemini",
+			modelSuffix: "gemini-mixed-model(xhigh)",
+			inputJSON:   `{"model":"gemini-mixed-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// none → ModeNone, ZeroAllowed=false → set Level to lowest (low), includeThoughts=false
+		{
+			name:            "29",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "gemini-mixed-model(none)",
+			inputJSON:       `{"model":"gemini-mixed-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// auto → dynamic allowed, use thinkingBudget=-1
+		{
+			name:            "30",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "gemini-mixed-model(auto)",
+			inputJSON:       `{"model":"gemini-mixed-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:        "31",
+			from:        "claude",
+			to:          "gemini",
+			modelSuffix: "gemini-mixed-model",
+			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// 8192 → ModeBudget → clamp (in range) → thinkingBudget: 8192
+		{
+			name:            "32",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-mixed-model(8192)",
+			inputJSON:       `{"model":"gemini-mixed-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// 64000 → ModeBudget → clamp to 32768 → thinkingBudget: 32768
+		{
+			name:            "33",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-mixed-model(64000)",
+			inputJSON:       `{"model":"gemini-mixed-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "32768",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// 0 → ModeNone, ZeroAllowed=false → set Level to lowest (low), includeThoughts=false
+		{
+			name:            "34",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-mixed-model(0)",
+			inputJSON:       `{"model":"gemini-mixed-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// -1 → auto, dynamic allowed
+		{
+			name:            "35",
+			from:            "claude",
+			to:              "gemini",
+			modelSuffix:     "gemini-mixed-model(-1)",
+			inputJSON:       `{"model":"gemini-mixed-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// claude-budget-model (Min=1024, Max=128000, ZeroAllowed=true, DynamicAllowed=false)
+		{
+			name:        "36",
+			from:        "openai",
+			to:          "claude",
+			modelSuffix: "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// medium → 8192
+		{
+			name:        "37",
+			from:        "openai",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(medium)",
+			inputJSON:   `{"model":"claude-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// xhigh → 32768
+		{
+			name:        "38",
+			from:        "openai",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(xhigh)",
+			inputJSON:   `{"model":"claude-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "32768",
+			expectErr:   false,
+		},
+		// none → ZeroAllowed=true → disabled
+		{
+			name:        "39",
+			from:        "openai",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(none)",
+			inputJSON:   `{"model":"claude-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.type",
+			expectValue: "disabled",
+			expectErr:   false,
+		},
+		// auto → ModeAuto, DynamicAllowed=false → convert to mid-range
+		{
+			name:        "40",
+			from:        "openai",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(auto)",
+			inputJSON:   `{"model":"claude-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "64512",
+			expectErr:   false,
+		},
+		{
+			name:        "41",
+			from:        "gemini",
+			to:          "claude",
+			modelSuffix: "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "42",
+			from:        "gemini",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(8192)",
+			inputJSON:   `{"model":"claude-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		{
+			name:        "43",
+			from:        "gemini",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(200000)",
+			inputJSON:   `{"model":"claude-budget-model(200000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "128000",
+			expectErr:   false,
+		},
+		// 0 → ZeroAllowed=true → disabled
+		{
+			name:        "44",
+			from:        "gemini",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(0)",
+			inputJSON:   `{"model":"claude-budget-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.type",
+			expectValue: "disabled",
+			expectErr:   false,
+		},
+		// -1 → auto → DynamicAllowed=false → mid-range
+		{
+			name:        "45",
+			from:        "gemini",
+			to:          "claude",
+			modelSuffix: "claude-budget-model(-1)",
+			inputJSON:   `{"model":"claude-budget-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "64512",
+			expectErr:   false,
+		},
+
+		// no-thinking-model (Thinking=nil)
+		{
+			name:        "46",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "47",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "no-thinking-model(8192)",
+			inputJSON:   `{"model":"no-thinking-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "48",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "no-thinking-model(0)",
+			inputJSON:   `{"model":"no-thinking-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "49",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "no-thinking-model(-1)",
+			inputJSON:   `{"model":"no-thinking-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "50",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "51",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "no-thinking-model(8192)",
+			inputJSON:   `{"model":"no-thinking-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "52",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "no-thinking-model(0)",
+			inputJSON:   `{"model":"no-thinking-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:        "53",
+			from:        "claude",
+			to:          "openai",
+			modelSuffix: "no-thinking-model(-1)",
+			inputJSON:   `{"model":"no-thinking-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+
+		// user-defined-model (UserDefined=true, Thinking=nil)
+		{
+			name:        "54",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// 8192 → medium (passthrough for UserDefined)
+		{
+			name:        "55",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// 64000 → xhigh
+		{
+			name:        "56",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "user-defined-model(64000)",
+			inputJSON:   `{"model":"user-defined-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "xhigh",
+			expectErr:   false,
+		},
+		// 0 → none
+		{
+			name:        "57",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "user-defined-model(0)",
+			inputJSON:   `{"model":"user-defined-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "none",
+			expectErr:   false,
+		},
+		// -1 → auto
+		{
+			name:        "58",
+			from:        "gemini",
+			to:          "openai",
+			modelSuffix: "user-defined-model(-1)",
+			inputJSON:   `{"model":"user-defined-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "auto",
+			expectErr:   false,
+		},
+		// Case 59: No suffix from claude → translator injects default reasoning.effort: medium
+		{
+			name:        "59",
+			from:        "claude",
+			to:          "codex",
+			modelSuffix: "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// 8192 → medium
+		{
+			name:        "60",
+			from:        "claude",
+			to:          "codex",
+			modelSuffix: "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// 64000 → xhigh
+		{
+			name:        "61",
+			from:        "claude",
+			to:          "codex",
+			modelSuffix: "user-defined-model(64000)",
+			inputJSON:   `{"model":"user-defined-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "xhigh",
+			expectErr:   false,
+		},
+		// 0 → none
+		{
+			name:        "62",
+			from:        "claude",
+			to:          "codex",
+			modelSuffix: "user-defined-model(0)",
+			inputJSON:   `{"model":"user-defined-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "none",
+			expectErr:   false,
+		},
+		// -1 → auto
+		{
+			name:        "63",
+			from:        "claude",
+			to:          "codex",
+			modelSuffix: "user-defined-model(-1)",
+			inputJSON:   `{"model":"user-defined-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "auto",
+			expectErr:   false,
+		},
 	}
 
-	// For tests we only keep model + thinking-related fields to avoid noise.
-	body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel)
-	return body, err
-}
+	for _, tc := range cases {
+		tc := tc
+		testName := fmt.Sprintf("Case%s_%s->%s_%s", tc.name, tc.from, tc.to, tc.modelSuffix)
+		t.Run(testName, func(t *testing.T) {
+			// Real data flow path:
+			// 1. Parse suffix to get base model
+			suffixResult := thinking.ParseSuffix(tc.modelSuffix)
+			baseModel := suffixResult.ModelName
 
-// filterThinkingBody projects the translated payload down to only model and
-// thinking-related fields for the given target protocol.
-func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte {
-	if len(body) == 0 {
-		return body
-	}
-	out := []byte(`{}`)
+			// 2. Translate request from source format to target format
+			body := sdktranslator.TranslateRequest(
+				sdktranslator.FromString(tc.from),
+				sdktranslator.FromString(tc.to),
+				baseModel,
+				[]byte(tc.inputJSON),
+				true,
+			)
 
-	// Preserve model if present, otherwise fall back to upstream/normalized model.
-	if m := gjson.GetBytes(body, "model"); m.Exists() {
-		out, _ = sjson.SetBytes(out, "model", m.Value())
-	} else if upstreamModel != "" {
-		out, _ = sjson.SetBytes(out, "model", upstreamModel)
-	} else if normalizedModel != "" {
-		out, _ = sjson.SetBytes(out, "model", normalizedModel)
-	}
+			// 3. Apply thinking configuration (main entry point)
+			body, err := thinking.ApplyThinking(body, tc.modelSuffix, tc.to)
 
-	switch toProtocol {
-	case "gemini":
-		if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() {
-			out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw))
-		}
-	case "claude":
-		if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() {
-			out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw))
-		}
-	case "openai":
-		if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() {
-			out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value())
-		}
-	case "codex":
-		if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() {
-			out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value())
-		}
-	}
-	return out
-}
-
-func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
-	cleanup := registerCoreModels(t)
-	defer cleanup()
-
-	type scenario struct {
-		name        string
-		modelSuffix string
-	}
-
-	numericName := func(budget int) string {
-		if budget < 0 {
-			return "numeric-neg1"
-		}
-		return fmt.Sprintf("numeric-%d", budget)
-	}
-
-	for _, model := range thinkingTestModels {
-		_ = registry.GetGlobalRegistry().GetModelInfo(model)
-
-		for _, from := range thinkingTestFromProtocols {
-			// Scenario selection follows protocol semantics:
-			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
-			// - Claude/Gemini-style protocols express thinking as numeric budgets.
-			cases := []scenario{
-				{name: "no-suffix", modelSuffix: model},
+			// Validate results
+			if tc.expectErr {
+				if err == nil {
+					t.Fatalf("expected error but got none, body=%s", string(body))
+				}
+				return
 			}
-			if from == "openai" || from == "openai-response" {
-				for _, lvl := range thinkingLevelSamples {
-					cases = append(cases, scenario{
-						name:        "level-" + lvl,
-						modelSuffix: fmt.Sprintf("%s(%s)", model, lvl),
-					})
-				}
-			} else { // claude or gemini
-				for _, budget := range thinkingNumericSamples {
-					budget := budget
-					cases = append(cases, scenario{
-						name:        numericName(budget),
-						modelSuffix: fmt.Sprintf("%s(%d)", model, budget),
-					})
-				}
+			if err != nil {
+				t.Fatalf("unexpected error: %v, body=%s", err, string(body))
 			}
 
-			for _, to := range thinkingTestToProtocols {
-				if from == to {
-					continue
+			// Check for expected field absence
+			if tc.expectField == "" {
+				var hasThinking bool
+				switch tc.to {
+				case "gemini":
+					hasThinking = gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists()
+				case "claude":
+					hasThinking = gjson.GetBytes(body, "thinking").Exists()
+				case "openai":
+					hasThinking = gjson.GetBytes(body, "reasoning_effort").Exists()
+				case "codex":
+					hasThinking = gjson.GetBytes(body, "reasoning.effort").Exists() || gjson.GetBytes(body, "reasoning").Exists()
 				}
-				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
-				t.Logf("  %s -> %s | model: %s", from, to, model)
-				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
-				for _, cs := range cases {
-					from := from
-					to := to
-					cs := cs
-					testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name)
-					t.Run(testName, func(t *testing.T) {
-						normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix)
-						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
-							switch to {
-							case "gemini":
-								budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata)
-								if !ok || !util.ModelSupportsThinking(normalizedModel) {
-									return false, "", false
-								}
-								if include != nil && !*include {
-									return false, "", false
-								}
-								if budget == nil {
-									return false, "", false
-								}
-								norm := util.NormalizeThinkingBudget(normalizedModel, *budget)
-								return true, fmt.Sprintf("%d", norm), false
-							case "claude":
-								if !util.ModelSupportsThinking(normalizedModel) {
-									return false, "", false
-								}
-								budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata)
-								if !ok || budget == nil {
-									return false, "", false
-								}
-								return true, fmt.Sprintf("%d", *budget), false
-							case "openai":
-								allowCompat := isOpenAICompatModel(normalizedModel)
-								if !util.ModelSupportsThinking(normalizedModel) && !allowCompat {
-									return false, "", false
-								}
-								// For allowCompat models, pass through effort directly without validation
-								if allowCompat {
-									effort, ok := util.ReasoningEffortFromMetadata(metadata)
-									if ok && strings.TrimSpace(effort) != "" {
-										return true, strings.ToLower(strings.TrimSpace(effort)), false
-									}
-									// Check numeric budget fallback for allowCompat
-									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-										if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
-											return true, mapped, false
-										}
-									}
-									return false, "", false
-								}
-								if !util.ModelUsesThinkingLevels(normalizedModel) {
-									// Non-levels models don't support effort strings in openai
-									return false, "", false
-								}
-								effort, ok := util.ReasoningEffortFromMetadata(metadata)
-								if !ok || strings.TrimSpace(effort) == "" {
-									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-										if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap {
-											effort = mapped
-											ok = true
-										}
-									}
-								}
-								if !ok || strings.TrimSpace(effort) == "" {
-									return false, "", false
-								}
-								effort = strings.ToLower(strings.TrimSpace(effort))
-								if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
-									return true, normalized, false
-								}
-								return false, "", true // validation would fail
-							case "codex":
-								// Codex does not support allowCompat; require thinking-capable level models.
-								if !util.ModelSupportsThinking(normalizedModel) || !util.ModelUsesThinkingLevels(normalizedModel) {
-									return false, "", false
-								}
-								effort, ok := util.ReasoningEffortFromMetadata(metadata)
-								if ok && strings.TrimSpace(effort) != "" {
-									effort = strings.ToLower(strings.TrimSpace(effort))
-									if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
-										return true, normalized, false
-									}
-									return false, "", true
-								}
-								if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-									if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
-										mapped = strings.ToLower(strings.TrimSpace(mapped))
-										if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel {
-											return true, normalized, false
-										}
-										return false, "", true
-									}
-								}
-								if from != "openai-response" {
-									// Codex translators default reasoning.effort to "medium" when
-									// no explicit thinking suffix/metadata is provided.
-									return true, "medium", false
-								}
-								return false, "", false
-							default:
-								return false, "", false
-							}
-						}()
-
-						body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix)
-						actualPresent, actualValue := func() (bool, string) {
-							path := ""
-							switch to {
-							case "gemini":
-								path = "generationConfig.thinkingConfig.thinkingBudget"
-							case "claude":
-								path = "thinking.budget_tokens"
-							case "openai":
-								path = "reasoning_effort"
-							case "codex":
-								path = "reasoning.effort"
-							}
-							if path == "" {
-								return false, ""
-							}
-							val := gjson.GetBytes(body, path)
-							if to == "codex" && !val.Exists() {
-								reasoning := gjson.GetBytes(body, "reasoning")
-								if reasoning.Exists() {
-									val = reasoning.Get("effort")
-								}
-							}
-							if !val.Exists() {
-								return false, ""
-							}
-							if val.Type == gjson.Number {
-								return true, fmt.Sprintf("%d", val.Int())
-							}
-							return true, val.String()
-						}()
-
-						t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
-							from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
-
-						if expectErr {
-							if err == nil {
-								t.Fatalf("expected validation error but got none, body=%s", string(body))
-							}
-							return
-						}
-						if err != nil {
-							t.Fatalf("unexpected error: %v body=%s", err, string(body))
-						}
-
-						if expectPresent != actualPresent {
-							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
-						}
-						if expectPresent && expectValue != actualValue {
-							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
-						}
-					})
-				}
-			}
-		}
-	}
-}
-
-// buildRawPayloadWithThinking creates a payload with thinking parameters already in the body.
-// This tests the path where thinking comes from the raw payload, not model suffix.
-func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any) []byte {
-	switch fromProtocol {
-	case "gemini":
-		base := fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, model)
-		if budget, ok := thinkingParam.(int); ok {
-			base, _ = sjson.Set(base, "generationConfig.thinkingConfig.thinkingBudget", budget)
-		}
-		return []byte(base)
-	case "openai-response":
-		base := fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, model)
-		if effort, ok := thinkingParam.(string); ok && effort != "" {
-			base, _ = sjson.Set(base, "reasoning.effort", effort)
-		}
-		return []byte(base)
-	case "openai":
-		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
-		if effort, ok := thinkingParam.(string); ok && effort != "" {
-			base, _ = sjson.Set(base, "reasoning_effort", effort)
-		}
-		return []byte(base)
-	case "claude":
-		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
-		if budget, ok := thinkingParam.(int); ok {
-			base, _ = sjson.Set(base, "thinking.type", "enabled")
-			base, _ = sjson.Set(base, "thinking.budget_tokens", budget)
-		}
-		return []byte(base)
-	default:
-		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model))
-	}
-}
-
-// buildBodyForProtocolWithRawThinking translates payload with raw thinking params.
-func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, model string, thinkingParam any) ([]byte, error) {
-	t.Helper()
-	raw := buildRawPayloadWithThinking(fromProtocol, model, thinkingParam)
-	stream := fromProtocol != toProtocol
-
-	body := sdktranslator.TranslateRequest(
-		sdktranslator.FromString(fromProtocol),
-		sdktranslator.FromString(toProtocol),
-		model,
-		raw,
-		stream,
-	)
-
-	var err error
-	allowCompat := isOpenAICompatModel(model)
-	switch toProtocol {
-	case "gemini":
-		body = util.ApplyDefaultThinkingIfNeeded(model, body)
-		body = util.NormalizeGeminiThinkingBudget(model, body)
-		body = util.StripThinkingConfigIfUnsupported(model, body)
-	case "claude":
-		// For raw payload, Claude thinking is passed through by translator
-		// No additional processing needed as thinking is already in body
-	case "openai":
-		body = executor.NormalizeThinkingConfig(body, model, allowCompat)
-		err = executor.ValidateThinkingConfig(body, model)
-	case "codex":
-		// Codex does not support allowCompat; always use false.
-		body, err = normalizeCodexPayload(body, model, false)
-	}
-
-	body, _ = sjson.SetBytes(body, "model", model)
-	body = filterThinkingBody(toProtocol, body, model, model)
-	return body, err
-}
-
-func TestRawPayloadThinkingConversions(t *testing.T) {
-	cleanup := registerCoreModels(t)
-	defer cleanup()
-
-	type scenario struct {
-		name          string
-		thinkingParam any // int for budget, string for effort level
-	}
-
-	numericName := func(budget int) string {
-		if budget < 0 {
-			return "budget-neg1"
-		}
-		return fmt.Sprintf("budget-%d", budget)
-	}
-
-	for _, model := range thinkingTestModels {
-		supportsThinking := util.ModelSupportsThinking(model)
-		usesLevels := util.ModelUsesThinkingLevels(model)
-		allowCompat := isOpenAICompatModel(model)
-
-		for _, from := range thinkingTestFromProtocols {
-			var cases []scenario
-			switch from {
-			case "openai", "openai-response":
-				cases = []scenario{
-					{name: "no-thinking", thinkingParam: nil},
-				}
-				for _, lvl := range thinkingLevelSamples {
-					cases = append(cases, scenario{
-						name:          "effort-" + lvl,
-						thinkingParam: lvl,
-					})
-				}
-			case "gemini", "claude":
-				cases = []scenario{
-					{name: "no-thinking", thinkingParam: nil},
-				}
-				for _, budget := range thinkingNumericSamples {
-					budget := budget
-					cases = append(cases, scenario{
-						name:          numericName(budget),
-						thinkingParam: budget,
-					})
+				if hasThinking {
+					t.Fatalf("expected no thinking field but found one, body=%s", string(body))
 				}
+				return
 			}
 
-			for _, to := range thinkingTestToProtocols {
-				if from == to {
-					continue
-				}
-				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
-				t.Logf("  RAW PAYLOAD: %s -> %s | model: %s", from, to, model)
-				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
-
-				for _, cs := range cases {
-					from := from
-					to := to
-					cs := cs
-					testName := fmt.Sprintf("raw/%s->%s/%s/%s", from, to, model, cs.name)
-					t.Run(testName, func(t *testing.T) {
-						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
-							if cs.thinkingParam == nil {
-								if to == "codex" && from != "openai-response" && supportsThinking && usesLevels {
-									// Codex translators default reasoning.effort to "medium" for thinking-capable level models
-									return true, "medium", false
-								}
-								return false, "", false
-							}
-
-							switch to {
-							case "gemini":
-								if !supportsThinking || usesLevels {
-									return false, "", false
-								}
-								// Gemini expects numeric budget (only for non-level models)
-								if budget, ok := cs.thinkingParam.(int); ok {
-									norm := util.NormalizeThinkingBudget(model, budget)
-									return true, fmt.Sprintf("%d", norm), false
-								}
-								// Convert effort level to budget for non-level models only
-								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
-									// "none" disables thinking - no thinkingBudget in output
-									if strings.ToLower(effort) == "none" {
-										return false, "", false
-									}
-									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
-										// ThinkingEffortToBudget already returns normalized budget
-										return true, fmt.Sprintf("%d", budget), false
-									}
-									// Invalid effort does not map to a budget
-									return false, "", false
-								}
-								return false, "", false
-							case "claude":
-								if !supportsThinking || usesLevels {
-									return false, "", false
-								}
-								// Claude expects numeric budget (only for non-level models)
-								if budget, ok := cs.thinkingParam.(int); ok && budget > 0 {
-									norm := util.NormalizeThinkingBudget(model, budget)
-									return true, fmt.Sprintf("%d", norm), false
-								}
-								// Convert effort level to budget for non-level models only
-								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
-									// "none" and "auto" don't produce budget_tokens
-									lower := strings.ToLower(effort)
-									if lower == "none" || lower == "auto" {
-										return false, "", false
-									}
-									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
-										// ThinkingEffortToBudget already returns normalized budget
-										return true, fmt.Sprintf("%d", budget), false
-									}
-									// Invalid effort - claude sets thinking.type:enabled but no budget_tokens
-									return false, "", false
-								}
-								return false, "", false
-							case "openai":
-								if allowCompat {
-									if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" {
-										normalized := strings.ToLower(strings.TrimSpace(effort))
-										return true, normalized, false
-									}
-									if budget, ok := cs.thinkingParam.(int); ok {
-										if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" {
-											return true, mapped, false
-										}
-									}
-									return false, "", false
-								}
-								if !supportsThinking || !usesLevels {
-									return false, "", false
-								}
-								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
-									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
-										return true, normalized, false
-									}
-									return false, "", true // invalid level
-								}
-								if budget, ok := cs.thinkingParam.(int); ok {
-									if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" {
-										// Check if the mapped effort is valid for this model
-										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
-											return true, mapped, true // expect validation error
-										}
-										return true, mapped, false
-									}
-								}
-								return false, "", false
-							case "codex":
-								// Codex does not support allowCompat; require thinking-capable level models.
-								if !supportsThinking || !usesLevels {
-									return false, "", false
-								}
-								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
-									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
-										return true, normalized, false
-									}
-									return false, "", true
-								}
-								if budget, ok := cs.thinkingParam.(int); ok {
-									if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" {
-										// Check if the mapped effort is valid for this model
-										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
-											return true, mapped, true // expect validation error
-										}
-										return true, mapped, false
-									}
-								}
-								if from != "openai-response" {
-									// Codex translators default reasoning.effort to "medium" for thinking-capable models
-									return true, "medium", false
-								}
-								return false, "", false
-							}
-							return false, "", false
-						}()
-
-						body, err := buildBodyForProtocolWithRawThinking(t, from, to, model, cs.thinkingParam)
-						actualPresent, actualValue := func() (bool, string) {
-							path := ""
-							switch to {
-							case "gemini":
-								path = "generationConfig.thinkingConfig.thinkingBudget"
-							case "claude":
-								path = "thinking.budget_tokens"
-							case "openai":
-								path = "reasoning_effort"
-							case "codex":
-								path = "reasoning.effort"
-							}
-							if path == "" {
-								return false, ""
-							}
-							val := gjson.GetBytes(body, path)
-							if to == "codex" && !val.Exists() {
-								reasoning := gjson.GetBytes(body, "reasoning")
-								if reasoning.Exists() {
-									val = reasoning.Get("effort")
-								}
-							}
-							if !val.Exists() {
-								return false, ""
-							}
-							if val.Type == gjson.Number {
-								return true, fmt.Sprintf("%d", val.Int())
-							}
-							return true, val.String()
-						}()
-
-						t.Logf("from=%s to=%s model=%s param=%v present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
-							from, to, model, cs.thinkingParam, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
-
-						if expectErr {
-							if err == nil {
-								t.Fatalf("expected validation error but got none, body=%s", string(body))
-							}
-							return
-						}
-						if err != nil {
-							t.Fatalf("unexpected error: %v body=%s", err, string(body))
-						}
-
-						if expectPresent != actualPresent {
-							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
-						}
-						if expectPresent && expectValue != actualValue {
-							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
-						}
-					})
-				}
+			// Check expected field value
+			val := gjson.GetBytes(body, tc.expectField)
+			if !val.Exists() {
+				t.Fatalf("expected field %s not found, body=%s", tc.expectField, string(body))
 			}
-		}
-	}
-}
 
-func TestThinkingBudgetToEffort(t *testing.T) {
-	cleanup := registerCoreModels(t)
-	defer cleanup()
-
-	cases := []struct {
-		name   string
-		model  string
-		budget int
-		want   string
-		ok     bool
-	}{
-		{name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true},
-		{name: "zero-none", model: "gpt-5", budget: 0, want: "minimal", ok: true},
-		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
-		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
-		{name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true},
-		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
-		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
-		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
-		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true},
-		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 64000, want: "xhigh", ok: true},
-		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
-	}
-
-	for _, cs := range cases {
-		cs := cs
-		t.Run(cs.name, func(t *testing.T) {
-			got, ok := util.ThinkingBudgetToEffort(cs.model, cs.budget)
-			if ok != cs.ok {
-				t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok)
+			actualValue := val.String()
+			if val.Type == gjson.Number {
+				actualValue = fmt.Sprintf("%d", val.Int())
 			}
-			if got != cs.want {
-				t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got)
-			}
-		})
-	}
-}
-
-func TestThinkingEffortToBudget(t *testing.T) {
-	cleanup := registerCoreModels(t)
-	defer cleanup()
-
-	cases := []struct {
-		name   string
-		model  string
-		effort string
-		want   int
-		ok     bool
-	}{
-		{name: "none", model: "gemini-2.5-pro", effort: "none", want: 0, ok: true},
-		{name: "auto", model: "gemini-2.5-pro", effort: "auto", want: -1, ok: true},
-		{name: "minimal", model: "gemini-2.5-pro", effort: "minimal", want: 512, ok: true},
-		{name: "low", model: "gemini-2.5-pro", effort: "low", want: 1024, ok: true},
-		{name: "medium", model: "gemini-2.5-pro", effort: "medium", want: 8192, ok: true},
-		{name: "high", model: "gemini-2.5-pro", effort: "high", want: 24576, ok: true},
-		{name: "xhigh", model: "gemini-2.5-pro", effort: "xhigh", want: 32768, ok: true},
-		{name: "empty-unsupported", model: "gemini-2.5-pro", effort: "", want: 0, ok: false},
-		{name: "invalid-unsupported", model: "gemini-2.5-pro", effort: "ultra", want: 0, ok: false},
-		{name: "case-insensitive", model: "gemini-2.5-pro", effort: "LOW", want: 1024, ok: true},
-		{name: "case-insensitive-medium", model: "gemini-2.5-pro", effort: "MEDIUM", want: 8192, ok: true},
-	}
-
-	for _, cs := range cases {
-		cs := cs
-		t.Run(cs.name, func(t *testing.T) {
-			got, ok := util.ThinkingEffortToBudget(cs.model, cs.effort)
-			if ok != cs.ok {
-				t.Fatalf("ok mismatch for model=%s effort=%s: expect %v got %v", cs.model, cs.effort, cs.ok, ok)
-			}
-			if got != cs.want {
-				t.Fatalf("value mismatch for model=%s effort=%s: expect %d got %d", cs.model, cs.effort, cs.want, got)
+			if actualValue != tc.expectValue {
+				t.Fatalf("field %s: expected %q, got %q, body=%s", tc.expectField, tc.expectValue, actualValue, string(body))
+			}
+
+			// Check includeThoughts for Gemini
+			if tc.includeThoughts != "" && tc.to == "gemini" {
+				itVal := gjson.GetBytes(body, "generationConfig.thinkingConfig.includeThoughts")
+				if !itVal.Exists() {
+					t.Fatalf("expected includeThoughts field not found, body=%s", string(body))
+				}
+				actual := fmt.Sprintf("%v", itVal.Bool())
+				if actual != tc.includeThoughts {
+					t.Fatalf("includeThoughts: expected %s, got %s, body=%s", tc.includeThoughts, actual, string(body))
+				}
 			}
 		})
 	}

From e8f5888d8e0aa47c7cda22c8893bddcb9d4d622e Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 09:02:26 +0800
Subject: [PATCH 07/65] fix(thinking): fix auth matching for thinking suffix
 and json field conflicts

---
 internal/thinking/provider/gemini/apply.go    | 4 ++++
 internal/thinking/provider/geminicli/apply.go | 4 ++++
 sdk/cliproxy/auth/conductor.go                | 7 +++++++
 3 files changed, 15 insertions(+)

diff --git a/internal/thinking/provider/gemini/apply.go b/internal/thinking/provider/gemini/apply.go
index 7de48919..eebc44d8 100644
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -127,6 +127,8 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
 
 	if config.Mode == thinking.ModeNone {
 		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
@@ -150,6 +152,8 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
 
 	budget := config.Budget
 	// ModeNone semantics:
diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go
index b076b7a6..1d5585ba 100644
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -87,6 +87,8 @@ func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) (
 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 
 	if config.Mode == thinking.ModeNone {
 		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
@@ -110,6 +112,8 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 
 	budget := config.Budget
 	includeThoughts := false
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index 5b1339be..6e035d0f 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -1711,6 +1711,13 @@ func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cli
 	}
 	candidates := make([]*Auth, 0, len(m.auths))
 	modelKey := strings.TrimSpace(model)
+	// Always use base model name (without thinking suffix) for auth matching.
+	if modelKey != "" {
+		parsed := thinking.ParseSuffix(modelKey)
+		if parsed.ModelName != "" {
+			modelKey = strings.TrimSpace(parsed.ModelName)
+		}
+	}
 	registryRef := registry.GetGlobalRegistry()
 	for _, candidate := range m.auths {
 		if candidate.Provider != provider || candidate.Disabled {

From 72f21256688bb6f17955a6ce38c199d50faca104 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 10:07:04 +0800
Subject: [PATCH 08/65] fix(executor): properly handle thinking application
 errors

---
 .../runtime/executor/aistudio_executor.go     |  5 +++-
 .../runtime/executor/antigravity_executor.go  | 20 ++++++++++---
 internal/runtime/executor/claude_executor.go  | 10 +++++--
 internal/runtime/executor/codex_executor.go   | 15 ++++++++--
 .../runtime/executor/gemini_cli_executor.go   | 15 ++++++++--
 internal/runtime/executor/gemini_executor.go  | 15 ++++++++--
 .../executor/gemini_vertex_executor.go        | 30 +++++++++++++++----
 internal/runtime/executor/iflow_executor.go   | 10 +++++--
 .../executor/openai_compat_executor.go        | 15 ++++++++--
 internal/runtime/executor/qwen_executor.go    | 10 +++++--
 internal/thinking/errors.go                   |  7 +++++
 internal/thinking/provider/geminicli/apply.go |  3 ++
 12 files changed, 126 insertions(+), 29 deletions(-)

diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go
index cf8e216e..fffb50c4 100644
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -393,7 +393,10 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
-	payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini")
+	payload, err := thinking.ApplyThinking(payload, req.Model, "gemini")
+	if err != nil {
+		return nil, translatedPayload{}, err
+	}
 	payload = fixGeminiImageAspectRatio(baseModel, payload)
 	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 4f704c05..94417ba0 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -136,7 +136,10 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	if err != nil {
+		return resp, err
+	}
 
 	// Preserve Claude special handling (use baseModel for registry lookups)
 	translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
@@ -254,7 +257,10 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	if err != nil {
+		return resp, err
+	}
 
 	// Preserve Claude special handling (use baseModel for registry lookups)
 	translated = normalizeAntigravityThinking(baseModel, translated, true)
@@ -620,7 +626,10 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	if err != nil {
+		return nil, err
+	}
 
 	// Preserve Claude special handling (use baseModel for registry lookups)
 	translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
@@ -801,7 +810,10 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	// Prepare payload once (doesn't depend on baseURL)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	payload, _ = thinking.ApplyThinking(payload, req.Model, "antigravity")
+	payload, err := thinking.ApplyThinking(payload, req.Model, "antigravity")
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
 
 	// Preserve Claude special handling (use baseModel for registry lookups)
 	payload = normalizeAntigravityThinking(baseModel, payload, isClaude)
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 9f2a5b22..636fefad 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -106,7 +106,10 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "claude")
+	body, err = thinking.ApplyThinking(body, req.Model, "claude")
+	if err != nil {
+		return resp, err
+	}
 
 	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
@@ -236,7 +239,10 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "claude")
+	body, err = thinking.ApplyThinking(body, req.Model, "claude")
+	if err != nil {
+		return nil, err
+	}
 
 	body = checkSystemInstructions(body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 9e553e3c..273987ea 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -96,7 +96,10 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "codex")
+	body, err = thinking.ApplyThinking(body, req.Model, "codex")
+	if err != nil {
+		return resp, err
+	}
 
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
@@ -201,7 +204,10 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
 	body = misc.StripCodexUserAgent(body)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "codex")
+	body, err = thinking.ApplyThinking(body, req.Model, "codex")
+	if err != nil {
+		return nil, err
+	}
 
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -302,7 +308,10 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "codex")
+	body, err := thinking.ApplyThinking(body, req.Model, "codex")
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
 
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index 3d08b830..add01cb3 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -123,7 +123,10 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	if err != nil {
+		return resp, err
+	}
 
 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
@@ -269,7 +272,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	if err != nil {
+		return nil, err
+	}
 
 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
@@ -473,7 +479,10 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	for range models {
 		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-		payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini-cli")
+		payload, err = thinking.ApplyThinking(payload, req.Model, "gemini-cli")
+		if err != nil {
+			return cliproxyexecutor.Response{}, err
+		}
 
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index fd6ec22e..4cc5d945 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -120,7 +120,10 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	if err != nil {
+		return resp, err
+	}
 
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
@@ -219,7 +222,10 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	if err != nil {
+		return nil, err
+	}
 
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
@@ -332,7 +338,10 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
 
 	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index 18d9f8d6..8a412b47 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -170,7 +170,10 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	if err != nil {
+		return resp, err
+	}
 
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
@@ -269,7 +272,10 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	if err != nil {
+		return resp, err
+	}
 
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
@@ -369,7 +375,10 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	if err != nil {
+		return nil, err
+	}
 
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
@@ -485,7 +494,10 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	if err != nil {
+		return nil, err
+	}
 
 	body = fixGeminiImageAspectRatio(baseModel, body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
@@ -593,7 +605,10 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
 
 	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
 	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
@@ -674,7 +689,10 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
 
 	translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq)
 	translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel)
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index cc158250..6ce4221c 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -92,7 +92,10 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, "iflow")
+	if err != nil {
+		return resp, err
+	}
 
 	body = preserveReasoningContentInMessages(body)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
@@ -187,7 +190,10 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, "iflow")
+	if err != nil {
+		return nil, err
+	}
 
 	body = preserveReasoningContentInMessages(body)
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 22e8b4c8..6ae9103f 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -92,7 +92,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
 
-	translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err = thinking.ApplyThinking(translated, req.Model, "openai")
+	if err != nil {
+		return resp, err
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -184,7 +187,10 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
 
-	translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err = thinking.ApplyThinking(translated, req.Model, "openai")
+	if err != nil {
+		return nil, err
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -291,7 +297,10 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau
 
 	modelForCounting := baseModel
 
-	translated, _ = thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err := thinking.ApplyThinking(translated, req.Model, "openai")
+	if err != nil {
+		return cliproxyexecutor.Response{}, err
+	}
 
 	enc, err := tokenizerForModel(modelForCounting)
 	if err != nil {
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index f7162893..ff35c935 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -86,7 +86,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "openai")
+	body, err = thinking.ApplyThinking(body, req.Model, "openai")
+	if err != nil {
+		return resp, err
+	}
 
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 
@@ -169,7 +172,10 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, _ = thinking.ApplyThinking(body, req.Model, "openai")
+	body, err = thinking.ApplyThinking(body, req.Model, "openai")
+	if err != nil {
+		return nil, err
+	}
 
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
diff --git a/internal/thinking/errors.go b/internal/thinking/errors.go
index 56f82c68..1cf9ccd0 100644
--- a/internal/thinking/errors.go
+++ b/internal/thinking/errors.go
@@ -1,6 +1,8 @@
 // Package thinking provides unified thinking configuration processing logic.
 package thinking
 
+import "net/http"
+
 // ErrorCode represents the type of thinking configuration error.
 type ErrorCode string
 
@@ -69,3 +71,8 @@ func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingE
 		Model:   model,
 	}
 }
+
+// StatusCode implements a portable status code interface for HTTP handlers.
+func (e *ThinkingError) StatusCode() int {
+	return http.StatusBadRequest
+}
diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go
index 1d5585ba..a4607107 100644
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -55,6 +55,9 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
 	if config.Mode == thinking.ModeAuto {
 		return a.applyBudgetFormat(body, config)
 	}
+	if config.Mode == thinking.ModeBudget {
+		return a.applyBudgetFormat(body, config)
+	}
 
 	// For non-auto modes, choose format based on model capabilities
 	support := modelInfo.Thinking

From a75fb6af906db3fd99955d5b20ebe123711d49a2 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 14:32:48 +0800
Subject: [PATCH 09/65] refactor(antigravity): remove hardcoded model aliases

---
 config.example.yaml                           |  20 +++-
 internal/registry/model_definitions.go        |  18 +--
 .../runtime/executor/antigravity_executor.go  | 107 ++++++------------
 3 files changed, 59 insertions(+), 86 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index 3a7e7fbd..353b4f1b 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -206,7 +206,22 @@ nonstream-keepalive-interval: 0
 # Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
 # NOTE: Mappings do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
 # You can repeat the same name with different aliases to expose multiple client model names.
-# oauth-model-mappings:
+oauth-model-mappings:
+  antigravity:
+    - name: "rev19-uic3-1p"
+      alias: "gemini-2.5-computer-use-preview-10-2025"
+    - name: "gemini-3-pro-image"
+      alias: "gemini-3-pro-image-preview"
+    - name: "gemini-3-pro-high"
+      alias: "gemini-3-pro-preview"
+    - name: "gemini-3-flash"
+      alias: "gemini-3-flash-preview"
+    - name: "claude-sonnet-4-5"
+      alias: "gemini-claude-sonnet-4-5"
+    - name: "claude-sonnet-4-5-thinking"
+      alias: "gemini-claude-sonnet-4-5-thinking"
+    - name: "claude-opus-4-5-thinking"
+      alias: "gemini-claude-opus-4-5-thinking"
 #   gemini-cli:
 #     - name: "gemini-2.5-pro"          # original model name under this channel
 #       alias: "g2.5p"                  # client-visible alias
@@ -217,9 +232,6 @@ nonstream-keepalive-interval: 0
 #   aistudio:
 #     - name: "gemini-2.5-pro"
 #       alias: "g2.5p"
-#   antigravity:
-#     - name: "gemini-3-pro-preview"
-#       alias: "g3p"
 #   claude:
 #     - name: "claude-sonnet-4-5-20250929"
 #       alias: "cs4.5"
diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 268caeb4..f0fb60cb 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -768,17 +768,17 @@ type AntigravityModelConfig struct {
 }
 
 // GetAntigravityModelConfig returns static configuration for antigravity models.
-// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
+// Keys use upstream model names returned by the Antigravity models endpoint.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
-		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
-		"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
-		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
-		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
-		"gemini-3-flash-preview":                  {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
-		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
-		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
+		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
+		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
+		"rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/rev19-uic3-1p"},
+		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-high"},
+		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image"},
+		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash"},
+		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
+		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
 	}
 }
 
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 94417ba0..1eb7b30f 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -1005,35 +1005,40 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		modelConfig := registry.GetAntigravityModelConfig()
 		models := make([]*registry.ModelInfo, 0, len(result.Map()))
 		for originalName := range result.Map() {
-			aliasName := modelName2Alias(originalName)
-			if aliasName != "" {
-				cfg := modelConfig[aliasName]
-				modelName := aliasName
-				if cfg != nil && cfg.Name != "" {
-					modelName = cfg.Name
-				}
-				modelInfo := &registry.ModelInfo{
-					ID:          aliasName,
-					Name:        modelName,
-					Description: aliasName,
-					DisplayName: aliasName,
-					Version:     aliasName,
-					Object:      "model",
-					Created:     now,
-					OwnedBy:     antigravityAuthType,
-					Type:        antigravityAuthType,
-				}
-				// Look up Thinking support from static config using alias name
-				if cfg != nil {
-					if cfg.Thinking != nil {
-						modelInfo.Thinking = cfg.Thinking
-					}
-					if cfg.MaxCompletionTokens > 0 {
-						modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
-					}
-				}
-				models = append(models, modelInfo)
+			modelID := strings.TrimSpace(originalName)
+			if modelID == "" {
+				continue
 			}
+			switch modelID {
+			case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
+				continue
+			}
+			cfg := modelConfig[modelID]
+			modelName := modelID
+			if cfg != nil && cfg.Name != "" {
+				modelName = cfg.Name
+			}
+			modelInfo := &registry.ModelInfo{
+				ID:          modelID,
+				Name:        modelName,
+				Description: modelID,
+				DisplayName: modelID,
+				Version:     modelID,
+				Object:      "model",
+				Created:     now,
+				OwnedBy:     antigravityAuthType,
+				Type:        antigravityAuthType,
+			}
+			// Look up Thinking support from static config using upstream model name.
+			if cfg != nil {
+				if cfg.Thinking != nil {
+					modelInfo.Thinking = cfg.Thinking
+				}
+				if cfg.MaxCompletionTokens > 0 {
+					modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
+				}
+			}
+			models = append(models, modelInfo)
 		}
 		return models
 	}
@@ -1171,7 +1176,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		}
 	}
 	payload = geminiToAntigravity(modelName, payload, projectID)
-	payload, _ = sjson.SetBytes(payload, "model", alias2ModelName(modelName))
+	payload, _ = sjson.SetBytes(payload, "model", modelName)
 
 	if strings.Contains(modelName, "claude") {
 		strJSON := string(payload)
@@ -1443,50 +1448,6 @@ func generateProjectID() string {
 	return adj + "-" + noun + "-" + randomPart
 }
 
-func modelName2Alias(modelName string) string {
-	switch modelName {
-	case "rev19-uic3-1p":
-		return "gemini-2.5-computer-use-preview-10-2025"
-	case "gemini-3-pro-image":
-		return "gemini-3-pro-image-preview"
-	case "gemini-3-pro-high":
-		return "gemini-3-pro-preview"
-	case "gemini-3-flash":
-		return "gemini-3-flash-preview"
-	case "claude-sonnet-4-5":
-		return "gemini-claude-sonnet-4-5"
-	case "claude-sonnet-4-5-thinking":
-		return "gemini-claude-sonnet-4-5-thinking"
-	case "claude-opus-4-5-thinking":
-		return "gemini-claude-opus-4-5-thinking"
-	case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
-		return ""
-	default:
-		return modelName
-	}
-}
-
-func alias2ModelName(modelName string) string {
-	switch modelName {
-	case "gemini-2.5-computer-use-preview-10-2025":
-		return "rev19-uic3-1p"
-	case "gemini-3-pro-image-preview":
-		return "gemini-3-pro-image"
-	case "gemini-3-pro-preview":
-		return "gemini-3-pro-high"
-	case "gemini-3-flash-preview":
-		return "gemini-3-flash"
-	case "gemini-claude-sonnet-4-5":
-		return "claude-sonnet-4-5"
-	case "gemini-claude-sonnet-4-5-thinking":
-		return "claude-sonnet-4-5-thinking"
-	case "gemini-claude-opus-4-5-thinking":
-		return "claude-opus-4-5-thinking"
-	default:
-		return modelName
-	}
-}
-
 // normalizeAntigravityThinking performs Antigravity-specific thinking config normalization.
 // This function is called AFTER thinking.ApplyThinking() to apply Claude-specific constraints.
 //

From 40ee065eff0abc75ce522cdf5b8c0a27c30a8912 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 15:44:08 +0800
Subject: [PATCH 10/65] fix(thinking): use static lookup to avoid alias issues

---
 internal/registry/model_definitions.go        | 12 +++++++
 internal/thinking/apply.go                    | 32 ++++++++++++-------
 internal/thinking/apply_test.go               |  2 +-
 internal/thinking/provider/claude/apply.go    |  2 +-
 internal/thinking/provider/codex/apply.go     |  2 +-
 internal/thinking/provider/gemini/apply.go    |  2 +-
 .../thinking/provider/gemini/apply_test.go    |  5 +--
 internal/thinking/provider/geminicli/apply.go |  2 +-
 .../thinking/provider/geminicli/apply_test.go | 11 ++++---
 internal/thinking/provider/openai/apply.go    |  2 +-
 .../thinking/provider/openai/apply_test.go    |  8 +++--
 11 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index f0fb60cb..b8ca8757 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -788,6 +788,7 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 	if modelID == "" {
 		return nil
 	}
+
 	allModels := [][]*ModelInfo{
 		GetClaudeModels(),
 		GetGeminiModels(),
@@ -805,5 +806,16 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 			}
 		}
 	}
+
+	// Check Antigravity static config
+	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil && cfg.Thinking != nil {
+		return &ModelInfo{
+			ID:                  modelID,
+			Name:                cfg.Name,
+			Thinking:            cfg.Thinking,
+			MaxCompletionTokens: cfg.MaxCompletionTokens,
+		}
+	}
+
 	return nil
 }
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index 8ee60b8d..44566cab 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -40,7 +40,7 @@ func RegisterProvider(name string, applier ProviderApplier) {
 // letting the upstream service validate the configuration.
 func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 	if modelInfo == nil {
-		return false
+		return true
 	}
 	return modelInfo.UserDefined
 }
@@ -87,28 +87,28 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	}
 
 	// 2. Parse suffix and get modelInfo
+	// First try dynamic registry, then fall back to static lookup
 	suffixResult := ParseSuffix(model)
 	baseModel := suffixResult.ModelName
 	modelInfo := registry.GetGlobalRegistry().GetModelInfo(baseModel)
+	if modelInfo == nil {
+		modelInfo = registry.LookupStaticModelInfo(baseModel)
+	}
 
 	// 3. Model capability check
-	if modelInfo == nil {
-		log.WithField("model", model).Debug("thinking: nil modelInfo, passthrough")
-		return body, nil
+	if IsUserDefinedModel(modelInfo) {
+		return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
 	}
 	if modelInfo.Thinking == nil {
-		if IsUserDefinedModel(modelInfo) {
-			return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
-		}
 		config := extractThinkingConfig(body, provider)
 		if hasThinkingConfig(config) {
 			log.WithFields(log.Fields{
-				"model":    modelInfo.ID,
+				"model":    baseModel,
 				"provider": provider,
 			}).Debug("thinking: model does not support thinking, stripping config")
 			return StripThinkingConfig(body, provider), nil
 		}
-		log.WithField("model", modelInfo.ID).Debug("thinking: model does not support thinking, passthrough")
+		log.WithField("model", baseModel).Debug("thinking: model does not support thinking, passthrough")
 		return body, nil
 	}
 
@@ -212,6 +212,14 @@ func parseSuffixToConfig(rawSuffix string) ThinkingConfig {
 // applyUserDefinedModel applies thinking configuration for user-defined models
 // without ThinkingSupport validation.
 func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider string, suffixResult SuffixResult) ([]byte, error) {
+	// Get model ID for logging
+	modelID := ""
+	if modelInfo != nil {
+		modelID = modelInfo.ID
+	} else {
+		modelID = suffixResult.ModelName
+	}
+
 	// Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
@@ -222,7 +230,7 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 
 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
-			"model":        modelInfo.ID,
+			"model":        modelID,
 			"provider":     provider,
 			"user_defined": true,
 			"passthrough":  true,
@@ -233,7 +241,7 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 	applier := GetProviderApplier(provider)
 	if applier == nil {
 		log.WithFields(log.Fields{
-			"model":        modelInfo.ID,
+			"model":        modelID,
 			"provider":     provider,
 			"user_defined": true,
 			"passthrough":  true,
@@ -242,7 +250,7 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 	}
 
 	log.WithFields(log.Fields{
-		"model":        modelInfo.ID,
+		"model":        modelID,
 		"provider":     provider,
 		"user_defined": true,
 		"passthrough":  false,
diff --git a/internal/thinking/apply_test.go b/internal/thinking/apply_test.go
index d89fff32..b49079db 100644
--- a/internal/thinking/apply_test.go
+++ b/internal/thinking/apply_test.go
@@ -150,7 +150,7 @@ func TestIsUserDefinedModel(t *testing.T) {
 		modelInfo *registry.ModelInfo
 		want      bool
 	}{
-		{"nil modelInfo", nil, false},
+		{"nil modelInfo", nil, true},
 		{"not user-defined no flag", &registry.ModelInfo{ID: "test"}, false},
 		{"not user-defined with type", &registry.ModelInfo{ID: "test", Type: "openai"}, false},
 		{"user-defined with flag", &registry.ModelInfo{ID: "test", Type: "openai", UserDefined: true}, true},
diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go
index e1409389..979ecd75 100644
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -54,7 +54,7 @@ func init() {
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if modelInfo == nil {
-		return body, nil
+		return applyCompatibleClaude(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		if modelInfo.Type == "" {
diff --git a/internal/thinking/provider/codex/apply.go b/internal/thinking/provider/codex/apply.go
index 386185a6..228bb6fe 100644
--- a/internal/thinking/provider/codex/apply.go
+++ b/internal/thinking/provider/codex/apply.go
@@ -45,7 +45,7 @@ func init() {
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if modelInfo == nil {
-		return body, nil
+		return applyCompatibleCodex(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		if modelInfo.Type == "" {
diff --git a/internal/thinking/provider/gemini/apply.go b/internal/thinking/provider/gemini/apply.go
index eebc44d8..bb574c31 100644
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -60,7 +60,7 @@ func init() {
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if modelInfo == nil {
-		return body, nil
+		return a.applyCompatible(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		if modelInfo.Type == "" {
diff --git a/internal/thinking/provider/gemini/apply_test.go b/internal/thinking/provider/gemini/apply_test.go
index 5f762a2f..1af2fa83 100644
--- a/internal/thinking/provider/gemini/apply_test.go
+++ b/internal/thinking/provider/gemini/apply_test.go
@@ -450,8 +450,9 @@ func TestGeminiApplyNilModelInfo(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
 	}
-	if string(result) != string(body) {
-		t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
+	// nil modelInfo now applies compatible config
+	if !gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Exists() {
+		t.Fatalf("Apply() with nil modelInfo should apply thinking config, got: %s", result)
 	}
 }
 
diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go
index a4607107..eb6d82a4 100644
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -30,7 +30,7 @@ func init() {
 // Apply applies thinking configuration to Gemini CLI request body.
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if modelInfo == nil {
-		return body, nil
+		return a.applyCompatible(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		if modelInfo.Type == "" {
diff --git a/internal/thinking/provider/geminicli/apply_test.go b/internal/thinking/provider/geminicli/apply_test.go
index a606457c..6bf77dd2 100644
--- a/internal/thinking/provider/geminicli/apply_test.go
+++ b/internal/thinking/provider/geminicli/apply_test.go
@@ -241,8 +241,9 @@ func TestGeminiCLIApplyNilModelInfo(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
 	}
-	if string(result) != string(body) {
-		t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
+	// nil modelInfo now applies compatible config
+	if !gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Exists() {
+		t.Fatalf("Apply() with nil modelInfo should apply thinking config, got: %s", result)
 	}
 }
 
@@ -277,9 +278,9 @@ func TestGeminiCLIApplyModeBudgetWithLevels(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Apply() error = %v", err)
 	}
-	// ModeBudget with Levels model: Apply returns body unchanged (conversion is upper layer's job)
-	if string(result) != string(body) {
-		t.Fatalf("Apply() ModeBudget with Levels should return original body, got: %s", result)
+	// ModeBudget applies budget format directly without conversion to levels
+	if !gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Exists() {
+		t.Fatalf("Apply() ModeBudget should apply budget format, got: %s", result)
 	}
 }
 
diff --git a/internal/thinking/provider/openai/apply.go b/internal/thinking/provider/openai/apply.go
index 810faf34..aea1c055 100644
--- a/internal/thinking/provider/openai/apply.go
+++ b/internal/thinking/provider/openai/apply.go
@@ -42,7 +42,7 @@ func init() {
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
 	if modelInfo == nil {
-		return body, nil
+		return applyCompatibleOpenAI(body, config)
 	}
 	if modelInfo.Thinking == nil {
 		if modelInfo.Type == "" {
diff --git a/internal/thinking/provider/openai/apply_test.go b/internal/thinking/provider/openai/apply_test.go
index 88c1800a..5be01e4e 100644
--- a/internal/thinking/provider/openai/apply_test.go
+++ b/internal/thinking/provider/openai/apply_test.go
@@ -43,12 +43,14 @@ func TestApplierImplementsInterface(t *testing.T) {
 func TestApplyNilModelInfo(t *testing.T) {
 	applier := NewApplier()
 	body := []byte(`{"model":"gpt-5.2"}`)
-	got, err := applier.Apply(body, thinking.ThinkingConfig{}, nil)
+	config := thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}
+	got, err := applier.Apply(body, config, nil)
 	if err != nil {
 		t.Fatalf("expected nil error, got %v", err)
 	}
-	if string(got) != string(body) {
-		t.Fatalf("expected body unchanged, got %s", string(got))
+	// nil modelInfo now applies compatible config
+	if !gjson.GetBytes(got, "reasoning_effort").Exists() {
+		t.Fatalf("expected reasoning_effort applied, got %s", string(got))
 	}
 }
 

From 7f1b2b3f6e8ebc72f1449f89f4e893a6fe921f39 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 16:30:28 +0800
Subject: [PATCH 11/65] fix(thinking): improve model lookup and validation

---
 internal/registry/model_registry.go           | 15 ++++++++
 .../runtime/executor/antigravity_executor.go  |  6 ++--
 internal/thinking/apply.go                    | 12 +++----
 internal/thinking/provider/claude/apply.go    | 11 ++----
 internal/thinking/provider/codex/apply.go     | 11 ++----
 internal/thinking/provider/gemini/apply.go    | 11 ++----
 .../thinking/provider/gemini/apply_test.go    | 34 +++++++------------
 internal/thinking/provider/geminicli/apply.go | 11 ++----
 .../thinking/provider/geminicli/apply_test.go | 34 +++++++------------
 internal/thinking/provider/iflow/apply.go     |  8 ++---
 .../thinking/provider/iflow/apply_test.go     | 30 ++++++----------
 internal/thinking/provider/openai/apply.go    | 11 ++----
 .../thinking/provider/openai/apply_test.go    | 21 ++++--------
 13 files changed, 78 insertions(+), 137 deletions(-)

diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
index c90f6f61..970c2dc9 100644
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -132,6 +132,21 @@ func GetGlobalRegistry() *ModelRegistry {
 	return globalRegistry
 }
 
+// LookupModelInfo searches the dynamic registry first, then falls back to static model definitions.
+//
+// This helper exists because some code paths only have a model ID and still need Thinking and
+// max completion token metadata even when the dynamic registry hasn't been populated.
+func LookupModelInfo(modelID string) *ModelInfo {
+	modelID = strings.TrimSpace(modelID)
+	if modelID == "" {
+		return nil
+	}
+	if info := GetGlobalRegistry().GetModelInfo(modelID); info != nil {
+		return info
+	}
+	return LookupStaticModelInfo(modelID)
+}
+
 // SetHook sets an optional hook for observing model registration changes.
 func (r *ModelRegistry) SetHook(hook ModelRegistryHook) {
 	if r == nil {
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 1eb7b30f..90ebb53f 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -1458,7 +1458,7 @@ func generateProjectID() string {
 //   - For Claude models: removing thinkingConfig if budget < minimum allowed
 func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte {
 	payload = util.StripThinkingConfigIfUnsupported(model, payload)
-	modelInfo := registry.GetGlobalRegistry().GetModelInfo(model)
+	modelInfo := registry.LookupModelInfo(model)
 	if modelInfo == nil || modelInfo.Thinking == nil {
 		return payload
 	}
@@ -1501,7 +1501,7 @@ func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromM
 	if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
 		return int(maxTok.Int()), false
 	}
-	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+	if modelInfo := registry.LookupModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
 		return modelInfo.MaxCompletionTokens, true
 	}
 	return 0, false
@@ -1510,7 +1510,7 @@ func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromM
 // antigravityMinThinkingBudget returns the minimum thinking budget for a model.
 // Falls back to -1 if no model info is found.
 func antigravityMinThinkingBudget(model string) int {
-	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
+	if modelInfo := registry.LookupModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
 		return modelInfo.Thinking.Min
 	}
 	return -1
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index 44566cab..415b721c 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -68,9 +68,11 @@ func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 //
 // Passthrough behavior (returns original body without error):
 //   - Unknown provider (not in providerAppliers map)
-//   - modelInfo is nil (model not found in registry)
 //   - modelInfo.Thinking is nil (model doesn't support thinking)
 //
+// Note: Unknown models (modelInfo is nil) are treated as user-defined models: we skip
+// validation and still apply the thinking config so the upstream can validate it.
+//
 // Example:
 //
 //	// With suffix - suffix config takes priority
@@ -87,15 +89,13 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	}
 
 	// 2. Parse suffix and get modelInfo
-	// First try dynamic registry, then fall back to static lookup
 	suffixResult := ParseSuffix(model)
 	baseModel := suffixResult.ModelName
-	modelInfo := registry.GetGlobalRegistry().GetModelInfo(baseModel)
-	if modelInfo == nil {
-		modelInfo = registry.LookupStaticModelInfo(baseModel)
-	}
+	modelInfo := registry.LookupModelInfo(baseModel)
 
 	// 3. Model capability check
+	// Unknown models are treated as user-defined so thinking config can still be applied.
+	// The upstream service is responsible for validating the configuration.
 	if IsUserDefinedModel(modelInfo) {
 		return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
 	}
diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go
index 979ecd75..b7833072 100644
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -53,18 +53,11 @@ func init() {
 //	  }
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
-	if modelInfo == nil {
+	if thinking.IsUserDefinedModel(modelInfo) {
 		return applyCompatibleClaude(body, config)
 	}
 	if modelInfo.Thinking == nil {
-		if modelInfo.Type == "" {
-			modelID := modelInfo.ID
-			if modelID == "" {
-				modelID = "unknown"
-			}
-			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
-		}
-		return applyCompatibleClaude(body, config)
+		return body, nil
 	}
 
 	// Only process ModeBudget and ModeNone; other modes pass through
diff --git a/internal/thinking/provider/codex/apply.go b/internal/thinking/provider/codex/apply.go
index 228bb6fe..3bed318b 100644
--- a/internal/thinking/provider/codex/apply.go
+++ b/internal/thinking/provider/codex/apply.go
@@ -44,18 +44,11 @@ func init() {
 //	  }
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
-	if modelInfo == nil {
+	if thinking.IsUserDefinedModel(modelInfo) {
 		return applyCompatibleCodex(body, config)
 	}
 	if modelInfo.Thinking == nil {
-		if modelInfo.Type == "" {
-			modelID := modelInfo.ID
-			if modelID == "" {
-				modelID = "unknown"
-			}
-			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
-		}
-		return applyCompatibleCodex(body, config)
+		return body, nil
 	}
 
 	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
diff --git a/internal/thinking/provider/gemini/apply.go b/internal/thinking/provider/gemini/apply.go
index bb574c31..c8560f19 100644
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -59,18 +59,11 @@ func init() {
 //	  }
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
-	if modelInfo == nil {
+	if thinking.IsUserDefinedModel(modelInfo) {
 		return a.applyCompatible(body, config)
 	}
 	if modelInfo.Thinking == nil {
-		if modelInfo.Type == "" {
-			modelID := modelInfo.ID
-			if modelID == "" {
-				modelID = "unknown"
-			}
-			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
-		}
-		return a.applyCompatible(body, config)
+		return body, nil
 	}
 
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
diff --git a/internal/thinking/provider/gemini/apply_test.go b/internal/thinking/provider/gemini/apply_test.go
index 1af2fa83..47c7e7ce 100644
--- a/internal/thinking/provider/gemini/apply_test.go
+++ b/internal/thinking/provider/gemini/apply_test.go
@@ -381,26 +381,21 @@ func TestGeminiApplyConflictingFields(t *testing.T) {
 	}
 }
 
-// TestGeminiApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
+// TestGeminiApplyThinkingNotSupported tests passthrough handling when modelInfo.Thinking is nil.
 func TestGeminiApplyThinkingNotSupported(t *testing.T) {
 	applier := NewApplier()
 	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`)
 
 	// Model with nil Thinking support
 	modelInfo := &registry.ModelInfo{ID: "gemini-unknown", Thinking: nil}
 
-	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
-	if err == nil {
-		t.Fatal("Apply() expected error for nil Thinking, got nil")
+	got, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() expected nil error for nil Thinking, got %v", err)
 	}
-
-	// Verify it's the correct error type
-	thinkErr, ok := err.(*thinking.ThinkingError)
-	if !ok {
-		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
-	}
-	if thinkErr.Code != thinking.ErrThinkingNotSupported {
-		t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
+	if string(got) != string(body) {
+		t.Fatalf("expected body unchanged, got %s", string(got))
 	}
 }
 
@@ -462,17 +457,14 @@ func TestGeminiApplyEmptyModelID(t *testing.T) {
 	applier := NewApplier()
 	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
 	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
+	body := []byte(`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`)
 
-	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
-	if err == nil {
-		t.Fatal("Apply() with empty modelID and nil Thinking should error")
+	got, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() expected nil error, got %v", err)
 	}
-	thinkErr, ok := err.(*thinking.ThinkingError)
-	if !ok {
-		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
-	}
-	if thinkErr.Model != "unknown" {
-		t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
+	if string(got) != string(body) {
+		t.Fatalf("expected body unchanged, got %s", string(got))
 	}
 }
 
diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go
index eb6d82a4..c8887723 100644
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -29,18 +29,11 @@ func init() {
 
 // Apply applies thinking configuration to Gemini CLI request body.
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
-	if modelInfo == nil {
+	if thinking.IsUserDefinedModel(modelInfo) {
 		return a.applyCompatible(body, config)
 	}
 	if modelInfo.Thinking == nil {
-		if modelInfo.Type == "" {
-			modelID := modelInfo.ID
-			if modelID == "" {
-				modelID = "unknown"
-			}
-			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
-		}
-		return a.applyCompatible(body, config)
+		return body, nil
 	}
 
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
diff --git a/internal/thinking/provider/geminicli/apply_test.go b/internal/thinking/provider/geminicli/apply_test.go
index 6bf77dd2..e6900496 100644
--- a/internal/thinking/provider/geminicli/apply_test.go
+++ b/internal/thinking/provider/geminicli/apply_test.go
@@ -208,26 +208,21 @@ func TestGeminiCLIApplyConflictingFields(t *testing.T) {
 	}
 }
 
-// TestGeminiCLIApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
+// TestGeminiCLIApplyThinkingNotSupported tests passthrough handling when modelInfo.Thinking is nil.
 func TestGeminiCLIApplyThinkingNotSupported(t *testing.T) {
 	applier := NewApplier()
 	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`)
 
 	// Model with nil Thinking support
 	modelInfo := &registry.ModelInfo{ID: "gemini-cli-unknown", Thinking: nil}
 
-	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
-	if err == nil {
-		t.Fatal("Apply() expected error for nil Thinking, got nil")
+	got, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() expected nil error for nil Thinking, got %v", err)
 	}
-
-	// Verify it's the correct error type
-	thinkErr, ok := err.(*thinking.ThinkingError)
-	if !ok {
-		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
-	}
-	if thinkErr.Code != thinking.ErrThinkingNotSupported {
-		t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
+	if string(got) != string(body) {
+		t.Fatalf("expected body unchanged, got %s", string(got))
 	}
 }
 
@@ -252,17 +247,14 @@ func TestGeminiCLIApplyEmptyModelID(t *testing.T) {
 	applier := NewApplier()
 	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
 	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
+	body := []byte(`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`)
 
-	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
-	if err == nil {
-		t.Fatal("Apply() with empty modelID and nil Thinking should error")
+	got, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() expected nil error, got %v", err)
 	}
-	thinkErr, ok := err.(*thinking.ThinkingError)
-	if !ok {
-		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
-	}
-	if thinkErr.Model != "unknown" {
-		t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
+	if string(got) != string(body) {
+		t.Fatalf("expected body unchanged, got %s", string(got))
 	}
 }
 
diff --git a/internal/thinking/provider/iflow/apply.go b/internal/thinking/provider/iflow/apply.go
index 5bca94f2..da986d22 100644
--- a/internal/thinking/provider/iflow/apply.go
+++ b/internal/thinking/provider/iflow/apply.go
@@ -54,15 +54,11 @@ func init() {
 //	  "reasoning_split": true
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
-	if modelInfo == nil {
+	if thinking.IsUserDefinedModel(modelInfo) {
 		return body, nil
 	}
 	if modelInfo.Thinking == nil {
-		modelID := modelInfo.ID
-		if modelID == "" {
-			modelID = "unknown"
-		}
-		return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		return body, nil
 	}
 
 	if isGLMModel(modelInfo.ID) {
diff --git a/internal/thinking/provider/iflow/apply_test.go b/internal/thinking/provider/iflow/apply_test.go
index f0c2a35b..9718c413 100644
--- a/internal/thinking/provider/iflow/apply_test.go
+++ b/internal/thinking/provider/iflow/apply_test.go
@@ -73,33 +73,23 @@ func TestApplyMissingThinkingSupport(t *testing.T) {
 	applier := NewApplier()
 
 	tests := []struct {
-		name      string
-		modelID   string
-		wantModel string
+		name    string
+		modelID string
 	}{
-		{"model id", "glm-4.6", "glm-4.6"},
-		{"empty model id", "", "unknown"},
+		{"model id", "glm-4.6"},
+		{"empty model id", ""},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			modelInfo := &registry.ModelInfo{ID: tt.modelID}
-			got, err := applier.Apply([]byte(`{"model":"`+tt.modelID+`"}`), thinking.ThinkingConfig{}, modelInfo)
-			if err == nil {
-				t.Fatalf("expected error, got nil")
+			body := []byte(`{"model":"` + tt.modelID + `"}`)
+			got, err := applier.Apply(body, thinking.ThinkingConfig{}, modelInfo)
+			if err != nil {
+				t.Fatalf("expected nil error, got %v", err)
 			}
-			if got != nil {
-				t.Fatalf("expected nil body on error, got %s", string(got))
-			}
-			thinkingErr, ok := err.(*thinking.ThinkingError)
-			if !ok {
-				t.Fatalf("expected ThinkingError, got %T", err)
-			}
-			if thinkingErr.Code != thinking.ErrThinkingNotSupported {
-				t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
-			}
-			if thinkingErr.Model != tt.wantModel {
-				t.Fatalf("expected model %s, got %s", tt.wantModel, thinkingErr.Model)
+			if string(got) != string(body) {
+				t.Fatalf("expected body unchanged, got %s", string(got))
 			}
 		})
 	}
diff --git a/internal/thinking/provider/openai/apply.go b/internal/thinking/provider/openai/apply.go
index aea1c055..eaad30ee 100644
--- a/internal/thinking/provider/openai/apply.go
+++ b/internal/thinking/provider/openai/apply.go
@@ -41,18 +41,11 @@ func init() {
 //	  "reasoning_effort": "high"
 //	}
 func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
-	if modelInfo == nil {
+	if thinking.IsUserDefinedModel(modelInfo) {
 		return applyCompatibleOpenAI(body, config)
 	}
 	if modelInfo.Thinking == nil {
-		if modelInfo.Type == "" {
-			modelID := modelInfo.ID
-			if modelID == "" {
-				modelID = "unknown"
-			}
-			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
-		}
-		return applyCompatibleOpenAI(body, config)
+		return body, nil
 	}
 
 	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
diff --git a/internal/thinking/provider/openai/apply_test.go b/internal/thinking/provider/openai/apply_test.go
index 5be01e4e..1e348d9e 100644
--- a/internal/thinking/provider/openai/apply_test.go
+++ b/internal/thinking/provider/openai/apply_test.go
@@ -57,22 +57,13 @@ func TestApplyNilModelInfo(t *testing.T) {
 func TestApplyMissingThinkingSupport(t *testing.T) {
 	applier := NewApplier()
 	modelInfo := &registry.ModelInfo{ID: "gpt-5.2"}
-	got, err := applier.Apply([]byte(`{"model":"gpt-5.2"}`), thinking.ThinkingConfig{}, modelInfo)
-	if err == nil {
-		t.Fatalf("expected error, got nil")
+	body := []byte(`{"model":"gpt-5.2"}`)
+	got, err := applier.Apply(body, thinking.ThinkingConfig{}, modelInfo)
+	if err != nil {
+		t.Fatalf("expected nil error, got %v", err)
 	}
-	if got != nil {
-		t.Fatalf("expected nil body on error, got %s", string(got))
-	}
-	thinkingErr, ok := err.(*thinking.ThinkingError)
-	if !ok {
-		t.Fatalf("expected ThinkingError, got %T", err)
-	}
-	if thinkingErr.Code != thinking.ErrThinkingNotSupported {
-		t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
-	}
-	if thinkingErr.Model != "gpt-5.2" {
-		t.Fatalf("expected model gpt-5.2, got %s", thinkingErr.Model)
+	if string(got) != string(body) {
+		t.Fatalf("expected body unchanged, got %s", string(got))
 	}
 }
 

From 33d66959e98d440de44e11b9fd21070995e94aef Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 16:39:28 +0800
Subject: [PATCH 12/65] test(thinking): remove legacy unit and integration
 tests

---
 internal/thinking/apply_main_test.go          | 144 -----
 internal/thinking/apply_test.go               | 501 -----------------
 internal/thinking/convert_test.go             | 277 ----------
 internal/thinking/errors_test.go              |  34 --
 internal/thinking/extract_test.go             |  42 --
 .../thinking/provider/claude/apply_test.go    | 288 ----------
 .../thinking/provider/gemini/apply_test.go    | 519 ------------------
 .../thinking/provider/geminicli/apply_test.go | 375 -------------
 .../thinking/provider/iflow/apply_test.go     | 318 -----------
 .../thinking/provider/openai/apply_test.go    | 336 ------------
 internal/thinking/provider_map_test.go        |  51 --
 internal/thinking/strip_test.go               |  66 ---
 internal/thinking/suffix_test.go              | 313 -----------
 internal/thinking/validate_test.go            | 349 ------------
 test/gemini3_thinking_level_test.go           | 423 --------------
 test/model_alias_thinking_suffix_test.go      | 262 ---------
 16 files changed, 4298 deletions(-)
 delete mode 100644 internal/thinking/apply_main_test.go
 delete mode 100644 internal/thinking/apply_test.go
 delete mode 100644 internal/thinking/convert_test.go
 delete mode 100644 internal/thinking/errors_test.go
 delete mode 100644 internal/thinking/extract_test.go
 delete mode 100644 internal/thinking/provider/claude/apply_test.go
 delete mode 100644 internal/thinking/provider/gemini/apply_test.go
 delete mode 100644 internal/thinking/provider/geminicli/apply_test.go
 delete mode 100644 internal/thinking/provider/iflow/apply_test.go
 delete mode 100644 internal/thinking/provider/openai/apply_test.go
 delete mode 100644 internal/thinking/provider_map_test.go
 delete mode 100644 internal/thinking/strip_test.go
 delete mode 100644 internal/thinking/suffix_test.go
 delete mode 100644 internal/thinking/validate_test.go
 delete mode 100644 test/gemini3_thinking_level_test.go
 delete mode 100644 test/model_alias_thinking_suffix_test.go

diff --git a/internal/thinking/apply_main_test.go b/internal/thinking/apply_main_test.go
deleted file mode 100644
index 93346109..00000000
--- a/internal/thinking/apply_main_test.go
+++ /dev/null
@@ -1,144 +0,0 @@
-// Package thinking provides unified thinking configuration processing logic.
-package thinking
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/tidwall/gjson"
-)
-
-// setupTestModels registers test models in the global registry for testing.
-// This is required because ApplyThinking now looks up models by name.
-func setupTestModels(t *testing.T) func() {
-	t.Helper()
-	reg := registry.GetGlobalRegistry()
-
-	// Register test models via RegisterClient (the correct API)
-	clientID := "test-thinking-client"
-	testModels := []*registry.ModelInfo{
-		{ID: "test-thinking-model", Thinking: &registry.ThinkingSupport{Min: 1, Max: 10}},
-		{ID: "test-no-thinking", Type: "gemini"},
-		{ID: "gpt-5.2-test", Thinking: &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "medium", "high"}}},
-	}
-
-	reg.RegisterClient(clientID, "test", testModels)
-
-	// Return cleanup function
-	return func() {
-		reg.UnregisterClient(clientID)
-	}
-}
-
-func TestApplyThinkingPassthrough(t *testing.T) {
-	cleanup := setupTestModels(t)
-	defer cleanup()
-
-	tests := []struct {
-		name     string
-		body     string
-		model    string
-		provider string
-	}{
-		{"unknown provider", `{"a":1}`, "test-thinking-model", "unknown"},
-		{"unknown model", `{"a":1}`, "nonexistent-model", "gemini"},
-		{"nil thinking support", `{"a":1}`, "test-no-thinking", "gemini"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err != nil {
-				t.Fatalf("ApplyThinking() error = %v", err)
-			}
-			if string(got) != tt.body {
-				t.Fatalf("ApplyThinking() = %s, want %s", string(got), tt.body)
-			}
-		})
-	}
-}
-
-func TestApplyThinkingValidationError(t *testing.T) {
-	cleanup := setupTestModels(t)
-	defer cleanup()
-
-	tests := []struct {
-		name     string
-		body     string
-		model    string
-		provider string
-	}{
-		{"unsupported level", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err == nil {
-				t.Fatalf("ApplyThinking() error = nil, want error")
-			}
-			// On validation error, ApplyThinking returns original body (defensive programming)
-			if string(got) != tt.body {
-				t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body)
-			}
-		})
-	}
-}
-
-func TestApplyThinkingSuffixPriority(t *testing.T) {
-	cleanup := setupTestModels(t)
-	defer cleanup()
-
-	// Register a model that supports thinking with budget
-	reg := registry.GetGlobalRegistry()
-	suffixClientID := "test-suffix-client"
-	testModels := []*registry.ModelInfo{
-		{
-			ID:       "gemini-2.5-pro-suffix-test",
-			Thinking: &registry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true},
-		},
-	}
-	reg.RegisterClient(suffixClientID, "gemini", testModels)
-	defer reg.UnregisterClient(suffixClientID)
-
-	tests := []struct {
-		name          string
-		body          string
-		model         string
-		provider      string
-		checkPath     string
-		expectedValue int
-	}{
-		{
-			"suffix overrides body config",
-			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
-			"gemini-2.5-pro-suffix-test(8192)",
-			"gemini",
-			"generationConfig.thinkingConfig.thinkingBudget",
-			8192,
-		},
-		{
-			"suffix none disables thinking",
-			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
-			"gemini-2.5-pro-suffix-test(none)",
-			"gemini",
-			"generationConfig.thinkingConfig.thinkingBudget",
-			0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err != nil {
-				t.Fatalf("ApplyThinking() error = %v", err)
-			}
-
-			// Use gjson to check the value
-			result := int(gjson.GetBytes(got, tt.checkPath).Int())
-			if result != tt.expectedValue {
-				t.Fatalf("ApplyThinking() %s = %v, want %v", tt.checkPath, result, tt.expectedValue)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/apply_test.go b/internal/thinking/apply_test.go
deleted file mode 100644
index b49079db..00000000
--- a/internal/thinking/apply_test.go
+++ /dev/null
@@ -1,501 +0,0 @@
-// Package thinking_test provides external tests for the thinking package.
-package thinking_test
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/tidwall/gjson"
-
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
-)
-
-// registerTestModels sets up test models in the registry and returns a cleanup function.
-func registerTestModels(t *testing.T) func() {
-	t.Helper()
-	reg := registry.GetGlobalRegistry()
-
-	testModels := []*registry.ModelInfo{
-		geminiBudgetModel(),
-		geminiLevelModel(),
-		claudeBudgetModel(),
-		openAILevelModel(),
-		iFlowModel(),
-		{ID: "claude-3"},
-		{ID: "gemini-2.5-pro-strip"},
-		{ID: "glm-4.6-strip"},
-	}
-
-	clientID := "test-thinking-models"
-	reg.RegisterClient(clientID, "test", testModels)
-
-	return func() {
-		reg.UnregisterClient(clientID)
-	}
-}
-
-// TestApplyThinking tests the main ApplyThinking entry point.
-//
-// ApplyThinking is the unified entry point for applying thinking configuration.
-// It routes to the appropriate provider-specific applier based on model.
-//
-// Depends on: Epic 10 Story 10-2 (apply-thinking main entry)
-func TestApplyThinking(t *testing.T) {
-	cleanup := registerTestModels(t)
-	defer cleanup()
-
-	tests := []struct {
-		name     string
-		body     string
-		model    string
-		provider string
-		check    string
-	}{
-		{"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini-2.5-pro-test", "gemini", "geminiBudget"},
-		{"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini-3-pro-preview-test", "gemini", "geminiLevel"},
-		{"gemini-cli budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "gemini-cli", "geminiCliBudget"},
-		{"antigravity budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "antigravity", "geminiCliBudget"},
-		{"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude-sonnet-4-5-test", "claude", "claudeBudget"},
-		{"claude enabled type auto", `{"thinking":{"type":"enabled"}}`, "claude-sonnet-4-5-test", "claude", "claudeAuto"},
-		{"openai level", `{"reasoning_effort":"high"}`, "gpt-5.2-test", "openai", "openaiLevel"},
-		{"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "glm-4.6-test", "iflow", "iflowEnable"},
-		{"unknown provider passthrough", `{"a":1}`, "gemini-2.5-pro-test", "unknown", "passthrough"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err != nil {
-				t.Fatalf("ApplyThinking() error = %v", err)
-			}
-			assertApplyThinkingCheck(t, tt.check, tt.body, got)
-		})
-	}
-}
-
-func TestApplyThinkingErrors(t *testing.T) {
-	cleanup := registerTestModels(t)
-	defer cleanup()
-
-	tests := []struct {
-		name     string
-		body     string
-		model    string
-		provider string
-	}{
-		{"unsupported level openai", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"},
-		{"unsupported level gemini", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"ultra"}}}`, "gemini-3-pro-preview-test", "gemini"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err == nil {
-				t.Fatalf("ApplyThinking() error = nil, want error")
-			}
-			// On validation error, ApplyThinking returns original body (defensive programming)
-			if string(got) != tt.body {
-				t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body)
-			}
-		})
-	}
-}
-
-func TestApplyThinkingStripOnUnsupportedModel(t *testing.T) {
-	cleanup := registerTestModels(t)
-	defer cleanup()
-
-	tests := []struct {
-		name      string
-		body      string
-		model     string
-		provider  string
-		stripped  []string
-		preserved []string
-	}{
-		{"claude strip", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude-3", "claude", []string{"thinking"}, []string{"model"}},
-		{"gemini strip", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini-2.5-pro-strip", "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}},
-		{"iflow strip", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "glm-4.6-strip", "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err != nil {
-				t.Fatalf("ApplyThinking() error = %v", err)
-			}
-
-			for _, path := range tt.stripped {
-				if gjson.GetBytes(got, path).Exists() {
-					t.Fatalf("expected %s to be stripped, got %s", path, string(got))
-				}
-			}
-			for _, path := range tt.preserved {
-				if !gjson.GetBytes(got, path).Exists() {
-					t.Fatalf("expected %s to be preserved, got %s", path, string(got))
-				}
-			}
-		})
-	}
-}
-
-func TestIsUserDefinedModel(t *testing.T) {
-	tests := []struct {
-		name      string
-		modelInfo *registry.ModelInfo
-		want      bool
-	}{
-		{"nil modelInfo", nil, true},
-		{"not user-defined no flag", &registry.ModelInfo{ID: "test"}, false},
-		{"not user-defined with type", &registry.ModelInfo{ID: "test", Type: "openai"}, false},
-		{"user-defined with flag", &registry.ModelInfo{ID: "test", Type: "openai", UserDefined: true}, true},
-		{"user-defined flag only", &registry.ModelInfo{ID: "test", UserDefined: true}, true},
-		{"has thinking not user-defined", &registry.ModelInfo{ID: "test", Type: "openai", Thinking: &registry.ThinkingSupport{Min: 1024}}, false},
-		{"has thinking with user-defined flag", &registry.ModelInfo{ID: "test", Type: "openai", Thinking: &registry.ThinkingSupport{Min: 1024}, UserDefined: true}, true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := thinking.IsUserDefinedModel(tt.modelInfo); got != tt.want {
-				t.Fatalf("IsUserDefinedModel() = %v, want %v", got, tt.want)
-			}
-		})
-	}
-}
-
-func TestApplyThinking_UserDefinedModel(t *testing.T) {
-	// Register user-defined test models
-	reg := registry.GetGlobalRegistry()
-	userDefinedModels := []*registry.ModelInfo{
-		{ID: "custom-gpt", Type: "openai", UserDefined: true},
-		{ID: "or-claude", Type: "openai", UserDefined: true},
-		{ID: "custom-gemini", Type: "gemini", UserDefined: true},
-		{ID: "vertex-flash", Type: "gemini", UserDefined: true},
-		{ID: "cli-gemini", Type: "gemini", UserDefined: true},
-		{ID: "ag-gemini", Type: "gemini", UserDefined: true},
-		{ID: "custom-claude", Type: "claude", UserDefined: true},
-		{ID: "unknown"},
-	}
-	clientID := "test-user-defined-models"
-	reg.RegisterClient(clientID, "test", userDefinedModels)
-	defer reg.UnregisterClient(clientID)
-
-	tests := []struct {
-		name     string
-		body     string
-		model    string
-		provider string
-		check    string
-	}{
-		{
-			"openai user-defined with reasoning_effort",
-			`{"model":"custom-gpt","reasoning_effort":"high"}`,
-			"custom-gpt",
-			"openai",
-			"openaiCompatible",
-		},
-		{
-			"openai-compatibility model with reasoning_effort",
-			`{"model":"or-claude","reasoning_effort":"high"}`,
-			"or-claude",
-			"openai",
-			"openaiCompatible",
-		},
-		{
-			"gemini user-defined with thinkingBudget",
-			`{"model":"custom-gemini","generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
-			"custom-gemini",
-			"gemini",
-			"geminiCompatibleBudget",
-		},
-		{
-			"vertex user-defined with thinkingBudget",
-			`{"model":"vertex-flash","generationConfig":{"thinkingConfig":{"thinkingBudget":16384}}}`,
-			"vertex-flash",
-			"gemini",
-			"geminiCompatibleBudget16384",
-		},
-		{
-			"gemini-cli user-defined with thinkingBudget",
-			`{"model":"cli-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
-			"cli-gemini",
-			"gemini-cli",
-			"geminiCliCompatibleBudget",
-		},
-		{
-			"antigravity user-defined with thinkingBudget",
-			`{"model":"ag-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
-			"ag-gemini",
-			"antigravity",
-			"geminiCliCompatibleBudget",
-		},
-		{
-			"claude user-defined with thinking",
-			`{"model":"custom-claude","thinking":{"type":"enabled","budget_tokens":8192}}`,
-			"custom-claude",
-			"claude",
-			"claudeCompatibleBudget",
-		},
-		{
-			"user-defined model no config",
-			`{"model":"custom-gpt","messages":[]}`,
-			"custom-gpt",
-			"openai",
-			"passthrough",
-		},
-		{
-			"non-user-defined model strips config",
-			`{"model":"unknown","reasoning_effort":"high"}`,
-			"unknown",
-			"openai",
-			"stripReasoning",
-		},
-		{
-			"user-defined model unknown provider",
-			`{"model":"custom-gpt","reasoning_effort":"high"}`,
-			"custom-gpt",
-			"unknown",
-			"passthrough",
-		},
-		{
-			"unknown model passthrough",
-			`{"model":"nonexistent","reasoning_effort":"high"}`,
-			"nonexistent",
-			"openai",
-			"passthrough",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err != nil {
-				t.Fatalf("ApplyThinking() error = %v", err)
-			}
-			assertCompatibleModelCheck(t, tt.check, tt.body, got)
-		})
-	}
-}
-
-// TestApplyThinkingSuffixPriority tests suffix priority over body config.
-func TestApplyThinkingSuffixPriority(t *testing.T) {
-	// Register test model
-	reg := registry.GetGlobalRegistry()
-	testModels := []*registry.ModelInfo{
-		{
-			ID:       "gemini-suffix-test",
-			Thinking: &registry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true},
-		},
-	}
-	clientID := "test-suffix-priority"
-	reg.RegisterClient(clientID, "gemini", testModels)
-	defer reg.UnregisterClient(clientID)
-
-	tests := []struct {
-		name          string
-		body          string
-		model         string
-		provider      string
-		checkPath     string
-		expectedValue int
-	}{
-		{
-			"suffix overrides body budget",
-			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
-			"gemini-suffix-test(8192)",
-			"gemini",
-			"generationConfig.thinkingConfig.thinkingBudget",
-			8192,
-		},
-		{
-			"suffix none sets budget to 0",
-			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`,
-			"gemini-suffix-test(none)",
-			"gemini",
-			"generationConfig.thinkingConfig.thinkingBudget",
-			0,
-		},
-		{
-			"no suffix uses body config",
-			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":5000}}}`,
-			"gemini-suffix-test",
-			"gemini",
-			"generationConfig.thinkingConfig.thinkingBudget",
-			5000,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider)
-			if err != nil {
-				t.Fatalf("ApplyThinking() error = %v", err)
-			}
-
-			result := int(gjson.GetBytes(got, tt.checkPath).Int())
-			if result != tt.expectedValue {
-				t.Fatalf("ApplyThinking() %s = %v, want %v\nbody: %s", tt.checkPath, result, tt.expectedValue, string(got))
-			}
-		})
-	}
-}
-
-func assertApplyThinkingCheck(t *testing.T, checkName, input string, body []byte) {
-	t.Helper()
-
-	switch checkName {
-	case "geminiBudget":
-		assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
-	case "geminiLevel":
-		assertJSONString(t, body, "generationConfig.thinkingConfig.thinkingLevel", "high")
-		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
-	case "geminiCliBudget":
-		assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-		assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true)
-	case "claudeBudget":
-		assertJSONString(t, body, "thinking.type", "enabled")
-		assertJSONInt(t, body, "thinking.budget_tokens", 16384)
-	case "claudeAuto":
-		// When type=enabled without budget, auto mode is applied using mid-range budget
-		assertJSONString(t, body, "thinking.type", "enabled")
-		// Budget should be mid-range: (1024 + 128000) / 2 = 64512
-		assertJSONInt(t, body, "thinking.budget_tokens", 64512)
-	case "openaiLevel":
-		assertJSONString(t, body, "reasoning_effort", "high")
-	case "iflowEnable":
-		assertJSONBool(t, body, "chat_template_kwargs.enable_thinking", true)
-		assertJSONBool(t, body, "chat_template_kwargs.clear_thinking", false)
-	case "passthrough":
-		if string(body) != input {
-			t.Fatalf("ApplyThinking() = %s, want %s", string(body), input)
-		}
-	default:
-		t.Fatalf("unknown check: %s", checkName)
-	}
-}
-
-func assertCompatibleModelCheck(t *testing.T, checkName, input string, body []byte) {
-	t.Helper()
-
-	switch checkName {
-	case "openaiCompatible":
-		assertJSONString(t, body, "reasoning_effort", "high")
-	case "geminiCompatibleBudget":
-		assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
-	case "geminiCompatibleBudget16384":
-		assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 16384)
-		assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true)
-	case "geminiCliCompatibleBudget":
-		assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-		assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true)
-	case "claudeCompatibleBudget":
-		assertJSONString(t, body, "thinking.type", "enabled")
-		assertJSONInt(t, body, "thinking.budget_tokens", 8192)
-	case "stripReasoning":
-		if gjson.GetBytes(body, "reasoning_effort").Exists() {
-			t.Fatalf("expected reasoning_effort to be stripped, got %s", string(body))
-		}
-	case "passthrough":
-		if string(body) != input {
-			t.Fatalf("ApplyThinking() = %s, want %s", string(body), input)
-		}
-	default:
-		t.Fatalf("unknown check: %s", checkName)
-	}
-}
-
-func assertJSONString(t *testing.T, body []byte, path, want string) {
-	t.Helper()
-	value := gjson.GetBytes(body, path)
-	if !value.Exists() {
-		t.Fatalf("expected %s to exist", path)
-	}
-	if value.String() != want {
-		t.Fatalf("value at %s = %s, want %s", path, value.String(), want)
-	}
-}
-
-func assertJSONInt(t *testing.T, body []byte, path string, want int) {
-	t.Helper()
-	value := gjson.GetBytes(body, path)
-	if !value.Exists() {
-		t.Fatalf("expected %s to exist", path)
-	}
-	if int(value.Int()) != want {
-		t.Fatalf("value at %s = %d, want %d", path, value.Int(), want)
-	}
-}
-
-func assertJSONBool(t *testing.T, body []byte, path string, want bool) {
-	t.Helper()
-	value := gjson.GetBytes(body, path)
-	if !value.Exists() {
-		t.Fatalf("expected %s to exist", path)
-	}
-	if value.Bool() != want {
-		t.Fatalf("value at %s = %t, want %t", path, value.Bool(), want)
-	}
-}
-
-func geminiBudgetModel() *registry.ModelInfo {
-	return &registry.ModelInfo{
-		ID: "gemini-2.5-pro-test",
-		Thinking: &registry.ThinkingSupport{
-			Min:         128,
-			Max:         32768,
-			ZeroAllowed: true,
-		},
-	}
-}
-
-func geminiLevelModel() *registry.ModelInfo {
-	return &registry.ModelInfo{
-		ID: "gemini-3-pro-preview-test",
-		Thinking: &registry.ThinkingSupport{
-			Min:    128,
-			Max:    32768,
-			Levels: []string{"minimal", "low", "medium", "high"},
-		},
-	}
-}
-
-func claudeBudgetModel() *registry.ModelInfo {
-	return &registry.ModelInfo{
-		ID: "claude-sonnet-4-5-test",
-		Thinking: &registry.ThinkingSupport{
-			Min:         1024,
-			Max:         128000,
-			ZeroAllowed: true,
-		},
-	}
-}
-
-func openAILevelModel() *registry.ModelInfo {
-	return &registry.ModelInfo{
-		ID: "gpt-5.2-test",
-		Thinking: &registry.ThinkingSupport{
-			Min:         128,
-			Max:         32768,
-			ZeroAllowed: true,
-			Levels:      []string{"low", "medium", "high"},
-		},
-	}
-}
-
-func iFlowModel() *registry.ModelInfo {
-	return &registry.ModelInfo{
-		ID: "glm-4.6-test",
-		Thinking: &registry.ThinkingSupport{
-			Min:         1,
-			Max:         10,
-			ZeroAllowed: true,
-		},
-	}
-}
diff --git a/internal/thinking/convert_test.go b/internal/thinking/convert_test.go
deleted file mode 100644
index eacc2532..00000000
--- a/internal/thinking/convert_test.go
+++ /dev/null
@@ -1,277 +0,0 @@
-// Package thinking provides unified thinking configuration processing logic.
-package thinking
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-)
-
-// TestConvertLevelToBudget tests the ConvertLevelToBudget function.
-//
-// ConvertLevelToBudget converts a thinking level to a budget value.
-// This is a semantic conversion - it does NOT apply clamping.
-//
-// Level → Budget mapping:
-//   - none    → 0
-//   - auto    → -1
-//   - minimal → 512
-//   - low     → 1024
-//   - medium  → 8192
-//   - high    → 24576
-//   - xhigh   → 32768
-func TestConvertLevelToBudget(t *testing.T) {
-	tests := []struct {
-		name   string
-		level  string
-		want   int
-		wantOK bool
-	}{
-		// Standard levels
-		{"none", "none", 0, true},
-		{"auto", "auto", -1, true},
-		{"minimal", "minimal", 512, true},
-		{"low", "low", 1024, true},
-		{"medium", "medium", 8192, true},
-		{"high", "high", 24576, true},
-		{"xhigh", "xhigh", 32768, true},
-
-		// Case insensitive
-		{"case insensitive HIGH", "HIGH", 24576, true},
-		{"case insensitive High", "High", 24576, true},
-		{"case insensitive NONE", "NONE", 0, true},
-		{"case insensitive Auto", "Auto", -1, true},
-
-		// Invalid levels
-		{"invalid ultra", "ultra", 0, false},
-		{"invalid maximum", "maximum", 0, false},
-		{"empty string", "", 0, false},
-		{"whitespace", " ", 0, false},
-		{"numeric string", "1000", 0, false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			budget, ok := ConvertLevelToBudget(tt.level)
-			if ok != tt.wantOK {
-				t.Errorf("ConvertLevelToBudget(%q) ok = %v, want %v", tt.level, ok, tt.wantOK)
-			}
-			if budget != tt.want {
-				t.Errorf("ConvertLevelToBudget(%q) = %d, want %d", tt.level, budget, tt.want)
-			}
-		})
-	}
-}
-
-// TestConvertBudgetToLevel tests the ConvertBudgetToLevel function.
-//
-// ConvertBudgetToLevel converts a budget value to the nearest level.
-// Uses threshold-based mapping for range conversion.
-//
-// Budget → Level thresholds:
-//   - -1       → auto
-//   - 0        → none
-//   - 1-512    → minimal
-//   - 513-1024 → low
-//   - 1025-8192 → medium
-//   - 8193-24576 → high
-//   - 24577+   → xhigh
-//
-// Depends on: Epic 4 Story 4-2 (budget to level conversion)
-func TestConvertBudgetToLevel(t *testing.T) {
-	tests := []struct {
-		name   string
-		budget int
-		want   string
-		wantOK bool
-	}{
-		// Special values
-		{"auto", -1, "auto", true},
-		{"none", 0, "none", true},
-
-		// Invalid negative values
-		{"invalid negative -2", -2, "", false},
-		{"invalid negative -100", -100, "", false},
-		{"invalid negative extreme", -999999, "", false},
-
-		// Minimal range (1-512)
-		{"minimal min", 1, "minimal", true},
-		{"minimal mid", 256, "minimal", true},
-		{"minimal max", 512, "minimal", true},
-
-		// Low range (513-1024)
-		{"low start", 513, "low", true},
-		{"low boundary", 1024, "low", true},
-
-		// Medium range (1025-8192)
-		{"medium start", 1025, "medium", true},
-		{"medium mid", 4096, "medium", true},
-		{"medium boundary", 8192, "medium", true},
-
-		// High range (8193-24576)
-		{"high start", 8193, "high", true},
-		{"high mid", 16384, "high", true},
-		{"high boundary", 24576, "high", true},
-
-		// XHigh range (24577+)
-		{"xhigh start", 24577, "xhigh", true},
-		{"xhigh mid", 32768, "xhigh", true},
-		{"xhigh large", 100000, "xhigh", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			level, ok := ConvertBudgetToLevel(tt.budget)
-			if ok != tt.wantOK {
-				t.Errorf("ConvertBudgetToLevel(%d) ok = %v, want %v", tt.budget, ok, tt.wantOK)
-			}
-			if level != tt.want {
-				t.Errorf("ConvertBudgetToLevel(%d) = %q, want %q", tt.budget, level, tt.want)
-			}
-		})
-	}
-}
-
-// TestConvertMixedFormat tests mixed format handling.
-//
-// Tests scenarios where both level and budget might be present,
-// or where format conversion requires special handling.
-//
-// Depends on: Epic 4 Story 4-3 (mixed format handling)
-func TestConvertMixedFormat(t *testing.T) {
-	tests := []struct {
-		name        string
-		inputBudget int
-		inputLevel  string
-		wantMode    ThinkingMode
-		wantBudget  int
-		wantLevel   ThinkingLevel
-	}{
-		// Level takes precedence when both present
-		{"level and budget - level wins", 8192, "high", ModeLevel, 0, LevelHigh},
-		{"level and zero budget", 0, "high", ModeLevel, 0, LevelHigh},
-
-		// Budget only
-		{"budget only", 16384, "", ModeBudget, 16384, ""},
-
-		// Level only
-		{"level only", 0, "medium", ModeLevel, 0, LevelMedium},
-
-		// Neither (default)
-		{"neither", 0, "", ModeNone, 0, ""},
-
-		// Special values
-		{"auto level", 0, "auto", ModeAuto, -1, LevelAuto},
-		{"none level", 0, "none", ModeNone, 0, LevelNone},
-		{"auto budget", -1, "", ModeAuto, -1, ""},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := normalizeMixedConfig(tt.inputBudget, tt.inputLevel)
-			if got.Mode != tt.wantMode {
-				t.Errorf("normalizeMixedConfig(%d, %q) Mode = %v, want %v", tt.inputBudget, tt.inputLevel, got.Mode, tt.wantMode)
-			}
-			if got.Budget != tt.wantBudget {
-				t.Errorf("normalizeMixedConfig(%d, %q) Budget = %d, want %d", tt.inputBudget, tt.inputLevel, got.Budget, tt.wantBudget)
-			}
-			if got.Level != tt.wantLevel {
-				t.Errorf("normalizeMixedConfig(%d, %q) Level = %q, want %q", tt.inputBudget, tt.inputLevel, got.Level, tt.wantLevel)
-			}
-		})
-	}
-}
-
-// TestNormalizeForModel tests model-aware format normalization.
-func TestNormalizeForModel(t *testing.T) {
-	budgetOnlyModel := &registry.ModelInfo{
-		Thinking: &registry.ThinkingSupport{
-			Min: 1024,
-			Max: 128000,
-		},
-	}
-	levelOnlyModel := &registry.ModelInfo{
-		Thinking: &registry.ThinkingSupport{
-			Levels: []string{"low", "medium", "high"},
-		},
-	}
-	hybridModel := &registry.ModelInfo{
-		Thinking: &registry.ThinkingSupport{
-			Min:    128,
-			Max:    32768,
-			Levels: []string{"minimal", "low", "medium", "high"},
-		},
-	}
-
-	tests := []struct {
-		name    string
-		config  ThinkingConfig
-		model   *registry.ModelInfo
-		want    ThinkingConfig
-		wantErr bool
-	}{
-		{"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false},
-		{"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 24576}, false},
-		{"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}, false},
-		{"level-only keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, false},
-		{"hybrid keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 16384}, hybridModel, ThinkingConfig{Mode: ModeBudget, Budget: 16384}, false},
-		{"hybrid keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, hybridModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, false},
-		{"auto passthrough", ThinkingConfig{Mode: ModeAuto, Budget: -1}, levelOnlyModel, ThinkingConfig{Mode: ModeAuto, Budget: -1}, false},
-		{"none passthrough", ThinkingConfig{Mode: ModeNone, Budget: 0}, budgetOnlyModel, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
-		{"invalid level", ThinkingConfig{Mode: ModeLevel, Level: "ultra"}, budgetOnlyModel, ThinkingConfig{}, true},
-		{"invalid budget", ThinkingConfig{Mode: ModeBudget, Budget: -2}, levelOnlyModel, ThinkingConfig{}, true},
-		{"nil modelInfo passthrough budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false},
-		{"nil modelInfo passthrough level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, nil, ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, false},
-		{"nil thinking degrades to none", ThinkingConfig{Mode: ModeBudget, Budget: 4096}, &registry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
-		{"nil thinking level degrades to none", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, &registry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := NormalizeForModel(&tt.config, tt.model)
-			if (err != nil) != tt.wantErr {
-				t.Fatalf("NormalizeForModel(%+v) error = %v, wantErr %v", tt.config, err, tt.wantErr)
-			}
-			if tt.wantErr {
-				return
-			}
-			if got == nil {
-				t.Fatalf("NormalizeForModel(%+v) returned nil config", tt.config)
-			}
-			if got.Mode != tt.want.Mode {
-				t.Errorf("NormalizeForModel(%+v) Mode = %v, want %v", tt.config, got.Mode, tt.want.Mode)
-			}
-			if got.Budget != tt.want.Budget {
-				t.Errorf("NormalizeForModel(%+v) Budget = %d, want %d", tt.config, got.Budget, tt.want.Budget)
-			}
-			if got.Level != tt.want.Level {
-				t.Errorf("NormalizeForModel(%+v) Level = %q, want %q", tt.config, got.Level, tt.want.Level)
-			}
-		})
-	}
-}
-
-// TestLevelToBudgetRoundTrip tests level → budget → level round trip.
-//
-// Verifies that converting level to budget and back produces consistent results.
-//
-// Depends on: Epic 4 Story 4-1, 4-2
-func TestLevelToBudgetRoundTrip(t *testing.T) {
-	levels := []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}
-
-	for _, level := range levels {
-		t.Run(level, func(t *testing.T) {
-			budget, ok := ConvertLevelToBudget(level)
-			if !ok {
-				t.Fatalf("ConvertLevelToBudget(%q) returned ok=false", level)
-			}
-			resultLevel, ok := ConvertBudgetToLevel(budget)
-			if !ok {
-				t.Fatalf("ConvertBudgetToLevel(%d) returned ok=false", budget)
-			}
-			if resultLevel != level {
-				t.Errorf("round trip: %q → %d → %q, want %q", level, budget, resultLevel, level)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/errors_test.go b/internal/thinking/errors_test.go
deleted file mode 100644
index 5ed2d0d0..00000000
--- a/internal/thinking/errors_test.go
+++ /dev/null
@@ -1,34 +0,0 @@
-// Package thinking provides unified thinking configuration processing logic.
-package thinking
-
-import "testing"
-
-// TestThinkingErrorError tests the Error() method of ThinkingError.
-//
-// Error() returns the message directly without code prefix.
-// Use Code field for programmatic error handling.
-func TestThinkingErrorError(t *testing.T) {
-	tests := []struct {
-		name     string
-		err      *ThinkingError
-		wantMsg  string
-		wantCode ErrorCode
-	}{
-		{"invalid suffix format", NewThinkingError(ErrInvalidSuffix, "invalid suffix format: model(abc"), "invalid suffix format: model(abc", ErrInvalidSuffix},
-		{"unknown level", NewThinkingError(ErrUnknownLevel, "unknown level: ultra"), "unknown level: ultra", ErrUnknownLevel},
-		{"level not supported", NewThinkingError(ErrLevelNotSupported, "level \"xhigh\" not supported, valid levels: low, medium, high"), "level \"xhigh\" not supported, valid levels: low, medium, high", ErrLevelNotSupported},
-		{"thinking not supported", NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "claude-haiku"), "thinking not supported for this model", ErrThinkingNotSupported},
-		{"provider mismatch", NewThinkingError(ErrProviderMismatch, "provider mismatch: expected claude, got gemini"), "provider mismatch: expected claude, got gemini", ErrProviderMismatch},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := tt.err.Error(); got != tt.wantMsg {
-				t.Errorf("Error() = %q, want %q", got, tt.wantMsg)
-			}
-			if tt.err.Code != tt.wantCode {
-				t.Errorf("Code = %q, want %q", tt.err.Code, tt.wantCode)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/extract_test.go b/internal/thinking/extract_test.go
deleted file mode 100644
index c697e130..00000000
--- a/internal/thinking/extract_test.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Package thinking provides unified thinking configuration processing logic.
-package thinking
-
-import "testing"
-
-func TestExtractThinkingConfig(t *testing.T) {
-	tests := []struct {
-		name     string
-		body     string
-		provider string
-		want     ThinkingConfig
-	}{
-		{"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 16384}},
-		{"claude disabled type", `{"thinking":{"type":"disabled"}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}},
-		{"claude auto budget", `{"thinking":{"budget_tokens":-1}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
-		{"claude enabled type without budget", `{"thinking":{"type":"enabled"}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
-		{"claude enabled type with budget", `{"thinking":{"type":"enabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 8192}},
-		{"claude disabled type overrides budget", `{"thinking":{"type":"disabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}},
-		{"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini", ThinkingConfig{Mode: ModeBudget, Budget: 8192}},
-		{"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}},
-		{"gemini cli auto", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"auto"}}}}`, "gemini-cli", ThinkingConfig{Mode: ModeAuto, Budget: -1}},
-		{"openai level", `{"reasoning_effort":"medium"}`, "openai", ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}},
-		{"openai none", `{"reasoning_effort":"none"}`, "openai", ThinkingConfig{Mode: ModeNone, Budget: 0}},
-		{"codex effort high", `{"reasoning":{"effort":"high"}}`, "codex", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}},
-		{"codex effort none", `{"reasoning":{"effort":"none"}}`, "codex", ThinkingConfig{Mode: ModeNone, Budget: 0}},
-		{"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "iflow", ThinkingConfig{Mode: ModeBudget, Budget: 1}},
-		{"iflow disable", `{"reasoning_split":false}`, "iflow", ThinkingConfig{Mode: ModeNone, Budget: 0}},
-		{"unknown provider", `{"thinking":{"budget_tokens":123}}`, "unknown", ThinkingConfig{}},
-		{"invalid json", `{"thinking":`, "claude", ThinkingConfig{}},
-		{"empty body", "", "claude", ThinkingConfig{}},
-		{"no config", `{}`, "gemini", ThinkingConfig{}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := extractThinkingConfig([]byte(tt.body), tt.provider)
-			if got != tt.want {
-				t.Fatalf("extractThinkingConfig() = %+v, want %+v", got, tt.want)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/provider/claude/apply_test.go b/internal/thinking/provider/claude/apply_test.go
deleted file mode 100644
index 769a17c4..00000000
--- a/internal/thinking/provider/claude/apply_test.go
+++ /dev/null
@@ -1,288 +0,0 @@
-// Package claude implements thinking configuration for Claude models.
-package claude
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/tidwall/gjson"
-)
-
-// =============================================================================
-// Unit Tests: Applier Creation and Interface
-// =============================================================================
-
-func TestNewApplier(t *testing.T) {
-	applier := NewApplier()
-	if applier == nil {
-		t.Fatal("NewApplier() returned nil")
-	}
-}
-
-func TestApplierImplementsInterface(t *testing.T) {
-	var _ thinking.ProviderApplier = (*Applier)(nil)
-}
-
-// =============================================================================
-// Unit Tests: Budget and Disable Logic (Pre-validated Config)
-// =============================================================================
-
-// TestClaudeApplyBudgetAndNone tests budget values and disable modes.
-// NOTE: These tests assume config has been pre-validated by ValidateConfig.
-// Apply trusts the input and does not perform clamping.
-func TestClaudeApplyBudgetAndNone(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildClaudeModelInfo()
-
-	tests := []struct {
-		name         string
-		config       thinking.ThinkingConfig
-		wantType     string
-		wantBudget   int
-		wantBudgetOK bool
-	}{
-		// Valid pre-validated budget values
-		{"budget 16k", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, "enabled", 16384, true},
-		{"budget min", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1024}, "enabled", 1024, true},
-		{"budget max", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 128000}, "enabled", 128000, true},
-		{"budget mid", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "enabled", 50000, true},
-		// Disable cases
-		{"budget zero disables", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "disabled", 0, false},
-		{"mode none disables", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "disabled", 0, false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			thinkingType := gjson.GetBytes(result, "thinking.type").String()
-			if thinkingType != tt.wantType {
-				t.Fatalf("thinking.type = %q, want %q", thinkingType, tt.wantType)
-			}
-
-			budgetValue := gjson.GetBytes(result, "thinking.budget_tokens")
-			if budgetValue.Exists() != tt.wantBudgetOK {
-				t.Fatalf("thinking.budget_tokens exists = %v, want %v", budgetValue.Exists(), tt.wantBudgetOK)
-			}
-			if tt.wantBudgetOK {
-				if got := int(budgetValue.Int()); got != tt.wantBudget {
-					t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
-				}
-			}
-		})
-	}
-}
-
-// TestClaudeApplyPassthroughBudget tests that Apply trusts pre-validated budget values.
-// It does NOT perform clamping - that's ValidateConfig's responsibility.
-func TestClaudeApplyPassthroughBudget(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildClaudeModelInfo()
-
-	tests := []struct {
-		name       string
-		config     thinking.ThinkingConfig
-		wantBudget int
-	}{
-		// Apply should pass through the budget value as-is
-		// (ValidateConfig would have clamped these, but Apply trusts the input)
-		{"passes through any budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 500}, 500},
-		{"passes through large budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 200000}, 200000},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
-				t.Fatalf("thinking.budget_tokens = %d, want %d (passthrough)", got, tt.wantBudget)
-			}
-		})
-	}
-}
-
-// =============================================================================
-// Unit Tests: Mode Passthrough (Strict Layering)
-// =============================================================================
-
-// TestClaudeApplyModePassthrough tests that non-Budget/None modes pass through unchanged.
-// Apply expects ValidateConfig to have already converted Level/Auto to Budget.
-func TestClaudeApplyModePassthrough(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildClaudeModelInfo()
-
-	tests := []struct {
-		name   string
-		config thinking.ThinkingConfig
-		body   string
-	}{
-		{"ModeLevel passes through", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "high"}, `{"model":"test"}`},
-		{"ModeAuto passes through", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, `{"model":"test"}`},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			// Should return body unchanged
-			if string(result) != tt.body {
-				t.Fatalf("Apply() = %s, want %s (passthrough)", string(result), tt.body)
-			}
-		})
-	}
-}
-
-// =============================================================================
-// Unit Tests: Output Format
-// =============================================================================
-
-// TestClaudeApplyOutputFormat tests the exact JSON output format.
-//
-// Claude expects:
-//
-//	{
-//	  "thinking": {
-//	    "type": "enabled",
-//	    "budget_tokens": 16384
-//	  }
-//	}
-func TestClaudeApplyOutputFormat(t *testing.T) {
-	tests := []struct {
-		name     string
-		config   thinking.ThinkingConfig
-		wantJSON string
-	}{
-		{
-			"enabled with budget",
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
-			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
-		},
-		{
-			"disabled",
-			thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0},
-			`{"thinking":{"type":"disabled"}}`,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			applier := NewApplier()
-			modelInfo := buildClaudeModelInfo()
-
-			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if string(result) != tt.wantJSON {
-				t.Fatalf("Apply() = %s, want %s", result, tt.wantJSON)
-			}
-		})
-	}
-}
-
-// =============================================================================
-// Unit Tests: Body Merging
-// =============================================================================
-
-// TestClaudeApplyWithExistingBody tests applying config to existing request body.
-func TestClaudeApplyWithExistingBody(t *testing.T) {
-	tests := []struct {
-		name     string
-		body     string
-		config   thinking.ThinkingConfig
-		wantBody string
-	}{
-		{
-			"add to empty body",
-			`{}`,
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
-			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
-		},
-		{
-			"preserve existing fields",
-			`{"model":"claude-sonnet-4-5","messages":[]}`,
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
-			`{"model":"claude-sonnet-4-5","messages":[],"thinking":{"type":"enabled","budget_tokens":8192}}`,
-		},
-		{
-			"override existing thinking",
-			`{"thinking":{"type":"enabled","budget_tokens":1000}}`,
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
-			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			applier := NewApplier()
-			modelInfo := buildClaudeModelInfo()
-
-			result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if string(result) != tt.wantBody {
-				t.Fatalf("Apply() = %s, want %s", result, tt.wantBody)
-			}
-		})
-	}
-}
-
-// TestClaudeApplyWithNilBody tests handling of nil/empty body.
-func TestClaudeApplyWithNilBody(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildClaudeModelInfo()
-
-	tests := []struct {
-		name       string
-		body       []byte
-		wantBudget int
-	}{
-		{"nil body", nil, 16384},
-		{"empty body", []byte{}, 16384},
-		{"empty object", []byte(`{}`), 16384},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}
-			result, err := applier.Apply(tt.body, config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			if got := gjson.GetBytes(result, "thinking.type").String(); got != "enabled" {
-				t.Fatalf("thinking.type = %q, want %q", got, "enabled")
-			}
-			if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
-				t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
-			}
-		})
-	}
-}
-
-// =============================================================================
-// Helper Functions
-// =============================================================================
-
-func buildClaudeModelInfo() *registry.ModelInfo {
-	return &registry.ModelInfo{
-		ID: "claude-sonnet-4-5",
-		Thinking: &registry.ThinkingSupport{
-			Min:            1024,
-			Max:            128000,
-			ZeroAllowed:    true,
-			DynamicAllowed: false,
-		},
-	}
-}
diff --git a/internal/thinking/provider/gemini/apply_test.go b/internal/thinking/provider/gemini/apply_test.go
deleted file mode 100644
index 47c7e7ce..00000000
--- a/internal/thinking/provider/gemini/apply_test.go
+++ /dev/null
@@ -1,519 +0,0 @@
-// Package gemini implements thinking configuration for Gemini models.
-package gemini
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/tidwall/gjson"
-)
-
-func TestNewApplier(t *testing.T) {
-	applier := NewApplier()
-	if applier == nil {
-		t.Fatal("NewApplier() returned nil")
-	}
-}
-
-// parseConfigFromSuffix parses a raw suffix into a ThinkingConfig.
-// This helper reduces code duplication in end-to-end tests (L1 fix).
-func parseConfigFromSuffix(rawSuffix string) (thinking.ThinkingConfig, bool) {
-	if budget, ok := thinking.ParseNumericSuffix(rawSuffix); ok {
-		return thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: budget}, true
-	}
-	if level, ok := thinking.ParseLevelSuffix(rawSuffix); ok {
-		return thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: level}, true
-	}
-	if mode, ok := thinking.ParseSpecialSuffix(rawSuffix); ok {
-		config := thinking.ThinkingConfig{Mode: mode}
-		if mode == thinking.ModeAuto {
-			config.Budget = -1
-		}
-		return config, true
-	}
-	return thinking.ThinkingConfig{}, false
-}
-
-func TestApplierImplementsInterface(t *testing.T) {
-	// Compile-time check: if Applier doesn't implement the interface, this won't compile
-	var _ thinking.ProviderApplier = (*Applier)(nil)
-}
-
-// TestGeminiApply tests the Gemini thinking applier.
-//
-// Gemini-specific behavior:
-//   - Gemini 2.5: thinkingBudget format (numeric)
-//   - Gemini 3.x: thinkingLevel format (string)
-//   - Flash series: ZeroAllowed=true
-//   - Pro series: ZeroAllowed=false, Min=128
-//   - CRITICAL: When budget=0/none, set includeThoughts=false
-//
-// Depends on: Epic 7 Story 7-2, 7-3
-func TestGeminiApply(t *testing.T) {
-	applier := NewApplier()
-	tests := []struct {
-		name                string
-		model               string
-		config              thinking.ThinkingConfig
-		wantField           string
-		wantValue           interface{}
-		wantIncludeThoughts bool // CRITICAL: includeThoughts field
-	}{
-		// Gemini 2.5 Flash (ZeroAllowed=true)
-		{"flash budget 8k", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
-		{"flash zero", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
-		{"flash none", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
-
-		// Gemini 2.5 Pro (ZeroAllowed=false, Min=128)
-		{"pro budget 8k", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
-		{"pro zero - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 128, false},
-		{"pro none - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 128, false},
-		{"pro below min", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50}, "thinkingBudget", 128, true},
-		{"pro above max", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "thinkingBudget", 32768, true},
-		{"pro auto", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
-
-		// Gemini 3 Pro (Level mode, ZeroAllowed=false)
-		{"g3-pro high", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
-		{"g3-pro low", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
-		{"g3-pro auto", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
-
-		// Gemini 3 Flash (Level mode, minimal is lowest)
-		{"g3-flash high", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
-		{"g3-flash medium", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "thinkingLevel", "medium", true},
-		{"g3-flash minimal", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildGeminiModelInfo(tt.model)
-			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
-			if err != nil {
-				t.Fatalf("ValidateConfig() error = %v", err)
-			}
-
-			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
-			switch want := tt.wantValue.(type) {
-			case int:
-				if int(gotField.Int()) != want {
-					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
-				}
-			case string:
-				if gotField.String() != want {
-					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
-				}
-			case bool:
-				if gotField.Bool() != want {
-					t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
-				}
-			default:
-				t.Fatalf("unsupported wantValue type %T", tt.wantValue)
-			}
-
-			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
-			if gotIncludeThoughts != tt.wantIncludeThoughts {
-				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
-			}
-		})
-	}
-}
-
-// TestGeminiApplyEndToEndBudgetZero tests suffix parsing + validation + apply for budget=0.
-//
-// This test covers the complete flow from suffix parsing to Apply output:
-//   - AC#1: ModeBudget+Budget=0 → ModeNone conversion
-//   - AC#3: Gemini 3 ModeNone+Budget>0 → includeThoughts=false + thinkingLevel=low
-//   - AC#4: Gemini 2.5 Pro (0) → clamped to 128 + includeThoughts=false
-func TestGeminiApplyEndToEndBudgetZero(t *testing.T) {
-	tests := []struct {
-		name                string
-		model               string
-		wantModel           string
-		wantField           string // "thinkingBudget" or "thinkingLevel"
-		wantValue           interface{}
-		wantIncludeThoughts bool
-	}{
-		// AC#4: Gemini 2.5 Pro - Budget format
-		{"gemini-25-pro zero", "gemini-2.5-pro(0)", "gemini-2.5-pro", "thinkingBudget", 128, false},
-		// AC#3: Gemini 3 Pro - Level format, ModeNone clamped to Budget=128, uses lowest level
-		{"gemini-3-pro zero", "gemini-3-pro-preview(0)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
-		{"gemini-3-pro none", "gemini-3-pro-preview(none)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
-		// Gemini 3 Flash - Level format, lowest level is "minimal"
-		{"gemini-3-flash zero", "gemini-3-flash-preview(0)", "gemini-3-flash-preview", "thinkingLevel", "minimal", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			suffix := thinking.ParseSuffix(tt.model)
-			if !suffix.HasSuffix {
-				t.Fatalf("ParseSuffix(%q) HasSuffix = false, want true", tt.model)
-			}
-			if suffix.ModelName != tt.wantModel {
-				t.Fatalf("ParseSuffix(%q) ModelName = %q, want %q", tt.model, suffix.ModelName, tt.wantModel)
-			}
-
-			// Parse suffix value using helper function (L1 fix)
-			config, ok := parseConfigFromSuffix(suffix.RawSuffix)
-			if !ok {
-				t.Fatalf("ParseSuffix(%q) RawSuffix = %q is not a valid suffix", tt.model, suffix.RawSuffix)
-			}
-
-			modelInfo := buildGeminiModelInfo(suffix.ModelName)
-			normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
-			if err != nil {
-				t.Fatalf("ValidateConfig() error = %v", err)
-			}
-
-			applier := NewApplier()
-			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			// Verify the output field value
-			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
-			switch want := tt.wantValue.(type) {
-			case int:
-				if int(gotField.Int()) != want {
-					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
-				}
-			case string:
-				if gotField.String() != want {
-					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
-				}
-			}
-
-			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
-			if gotIncludeThoughts != tt.wantIncludeThoughts {
-				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
-			}
-		})
-	}
-}
-
-// TestGeminiApplyEndToEndAuto tests auto mode through both suffix parsing and direct config.
-//
-// This test covers:
-//   - AC#2: Gemini 2.5 auto uses thinkingBudget=-1
-//   - AC#3: Gemini 3 auto uses thinkingBudget=-1 (not thinkingLevel)
-//   - Suffix parsing path: (auto) and (-1) suffixes
-//   - Direct config path: ModeLevel + Level=auto → ModeAuto conversion
-func TestGeminiApplyEndToEndAuto(t *testing.T) {
-	tests := []struct {
-		name                string
-		model               string                   // model name (with suffix for parsing, or plain for direct config)
-		directConfig        *thinking.ThinkingConfig // if not nil, use direct config instead of suffix parsing
-		wantField           string
-		wantValue           int
-		wantIncludeThoughts bool
-	}{
-		// Suffix parsing path - Budget-only model (Gemini 2.5)
-		{"suffix auto g25", "gemini-2.5-pro(auto)", nil, "thinkingBudget", -1, true},
-		{"suffix -1 g25", "gemini-2.5-pro(-1)", nil, "thinkingBudget", -1, true},
-		// Suffix parsing path - Hybrid model (Gemini 3)
-		{"suffix auto g3", "gemini-3-pro-preview(auto)", nil, "thinkingBudget", -1, true},
-		{"suffix -1 g3", "gemini-3-pro-preview(-1)", nil, "thinkingBudget", -1, true},
-		// Direct config path - Level=auto → ModeAuto conversion
-		{"direct level=auto g25", "gemini-2.5-pro", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
-		{"direct level=auto g3", "gemini-3-pro-preview", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			var config thinking.ThinkingConfig
-			var modelName string
-
-			if tt.directConfig != nil {
-				// Direct config path
-				config = *tt.directConfig
-				modelName = tt.model
-			} else {
-				// Suffix parsing path
-				suffix := thinking.ParseSuffix(tt.model)
-				if !suffix.HasSuffix {
-					t.Fatalf("ParseSuffix(%q) HasSuffix = false", tt.model)
-				}
-				modelName = suffix.ModelName
-				var ok bool
-				config, ok = parseConfigFromSuffix(suffix.RawSuffix)
-				if !ok {
-					t.Fatalf("parseConfigFromSuffix(%q) failed", suffix.RawSuffix)
-				}
-			}
-
-			modelInfo := buildGeminiModelInfo(modelName)
-			normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
-			if err != nil {
-				t.Fatalf("ValidateConfig() error = %v", err)
-			}
-
-			// Verify ModeAuto after validation
-			if normalized.Mode != thinking.ModeAuto {
-				t.Fatalf("ValidateConfig() Mode = %v, want ModeAuto", normalized.Mode)
-			}
-
-			applier := NewApplier()
-			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
-			if int(gotField.Int()) != tt.wantValue {
-				t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), tt.wantValue)
-			}
-
-			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
-			if gotIncludeThoughts != tt.wantIncludeThoughts {
-				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
-			}
-		})
-	}
-}
-
-func TestGeminiApplyInvalidBody(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildGeminiModelInfo("gemini-2.5-flash")
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
-	if err != nil {
-		t.Fatalf("ValidateConfig() error = %v", err)
-	}
-
-	tests := []struct {
-		name string
-		body []byte
-	}{
-		{"nil body", nil},
-		{"empty body", []byte{}},
-		{"invalid json", []byte("{\"not json\"")},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply(tt.body, *normalized, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			gotBudget := int(gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int())
-			if gotBudget != 8192 {
-				t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
-			}
-
-			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
-			if !gotIncludeThoughts {
-				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
-			}
-		})
-	}
-}
-
-// TestGeminiApplyConflictingFields tests that conflicting fields are removed.
-//
-// When applying Budget format, any existing thinkingLevel should be removed.
-// When applying Level format, any existing thinkingBudget should be removed.
-func TestGeminiApplyConflictingFields(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name         string
-		model        string
-		config       thinking.ThinkingConfig
-		existingBody string
-		wantField    string // expected field to exist
-		wantNoField  string // expected field to NOT exist
-	}{
-		// Budget format should remove existing thinkingLevel
-		{
-			"budget removes level",
-			"gemini-2.5-pro",
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
-			`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
-			"thinkingBudget",
-			"thinkingLevel",
-		},
-		// Level format should remove existing thinkingBudget
-		{
-			"level removes budget",
-			"gemini-3-pro-preview",
-			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
-			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
-			"thinkingLevel",
-			"thinkingBudget",
-		},
-		// ModeAuto uses budget format, should remove thinkingLevel
-		{
-			"auto removes level",
-			"gemini-3-pro-preview",
-			thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
-			`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
-			"thinkingBudget",
-			"thinkingLevel",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildGeminiModelInfo(tt.model)
-			result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			// Verify expected field exists
-			wantPath := "generationConfig.thinkingConfig." + tt.wantField
-			if !gjson.GetBytes(result, wantPath).Exists() {
-				t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
-			}
-
-			// Verify conflicting field was removed
-			noPath := "generationConfig.thinkingConfig." + tt.wantNoField
-			if gjson.GetBytes(result, noPath).Exists() {
-				t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
-			}
-		})
-	}
-}
-
-// TestGeminiApplyThinkingNotSupported tests passthrough handling when modelInfo.Thinking is nil.
-func TestGeminiApplyThinkingNotSupported(t *testing.T) {
-	applier := NewApplier()
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	body := []byte(`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`)
-
-	// Model with nil Thinking support
-	modelInfo := &registry.ModelInfo{ID: "gemini-unknown", Thinking: nil}
-
-	got, err := applier.Apply(body, config, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() expected nil error for nil Thinking, got %v", err)
-	}
-	if string(got) != string(body) {
-		t.Fatalf("expected body unchanged, got %s", string(got))
-	}
-}
-
-func buildGeminiModelInfo(modelID string) *registry.ModelInfo {
-	support := &registry.ThinkingSupport{}
-	switch modelID {
-	case "gemini-2.5-pro":
-		support.Min = 128
-		support.Max = 32768
-		support.ZeroAllowed = false
-		support.DynamicAllowed = true
-	case "gemini-2.5-flash", "gemini-2.5-flash-lite":
-		support.Min = 0
-		support.Max = 24576
-		support.ZeroAllowed = true
-		support.DynamicAllowed = true
-	case "gemini-3-pro-preview":
-		support.Min = 128
-		support.Max = 32768
-		support.ZeroAllowed = false
-		support.DynamicAllowed = true
-		support.Levels = []string{"low", "high"}
-	case "gemini-3-flash-preview":
-		support.Min = 128
-		support.Max = 32768
-		support.ZeroAllowed = false
-		support.DynamicAllowed = true
-		support.Levels = []string{"minimal", "low", "medium", "high"}
-	default:
-		// Unknown model - return nil Thinking to trigger error path
-		return &registry.ModelInfo{ID: modelID, Thinking: nil}
-	}
-	return &registry.ModelInfo{
-		ID:       modelID,
-		Thinking: support,
-	}
-}
-
-// TestGeminiApplyNilModelInfo tests Apply behavior when modelInfo is nil.
-// Coverage: apply.go:56-58 (H1)
-func TestGeminiApplyNilModelInfo(t *testing.T) {
-	applier := NewApplier()
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	body := []byte(`{"existing": "data"}`)
-
-	result, err := applier.Apply(body, config, nil)
-	if err != nil {
-		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
-	}
-	// nil modelInfo now applies compatible config
-	if !gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Exists() {
-		t.Fatalf("Apply() with nil modelInfo should apply thinking config, got: %s", result)
-	}
-}
-
-// TestGeminiApplyEmptyModelID tests Apply when modelID is empty.
-// Coverage: apply.go:61-63 (H2)
-func TestGeminiApplyEmptyModelID(t *testing.T) {
-	applier := NewApplier()
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
-	body := []byte(`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`)
-
-	got, err := applier.Apply(body, config, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() expected nil error, got %v", err)
-	}
-	if string(got) != string(body) {
-		t.Fatalf("expected body unchanged, got %s", string(got))
-	}
-}
-
-// TestGeminiApplyModeBudgetWithLevels tests that ModeBudget is applied with budget format
-// even for models with Levels. The Apply layer handles ModeBudget by applying thinkingBudget.
-// Coverage: apply.go:88-90
-func TestGeminiApplyModeBudgetWithLevels(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildGeminiModelInfo("gemini-3-flash-preview")
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	body := []byte(`{"existing": "data"}`)
-
-	result, err := applier.Apply(body, config, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() error = %v", err)
-	}
-	// ModeBudget applies budget format
-	budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int()
-	if budget != 8192 {
-		t.Fatalf("Apply() expected thinkingBudget=8192, got: %d", budget)
-	}
-}
-
-// TestGeminiApplyUnsupportedMode tests behavior with unsupported Mode types.
-// Coverage: apply.go:67-69 and 97-98 (H5, L2)
-func TestGeminiApplyUnsupportedMode(t *testing.T) {
-	applier := NewApplier()
-	body := []byte(`{"existing": "data"}`)
-
-	tests := []struct {
-		name   string
-		model  string
-		config thinking.ThinkingConfig
-	}{
-		{"unknown mode with budget model", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
-		{"unknown mode with level model", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildGeminiModelInfo(tt.model)
-			result, err := applier.Apply(body, tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			// Unsupported modes return original body unchanged
-			if string(result) != string(body) {
-				t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/provider/geminicli/apply_test.go b/internal/thinking/provider/geminicli/apply_test.go
deleted file mode 100644
index e6900496..00000000
--- a/internal/thinking/provider/geminicli/apply_test.go
+++ /dev/null
@@ -1,375 +0,0 @@
-// Package geminicli implements thinking configuration for Gemini CLI API format.
-package geminicli
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/tidwall/gjson"
-)
-
-func TestNewApplier(t *testing.T) {
-	applier := NewApplier()
-	if applier == nil {
-		t.Fatal("NewApplier() returned nil")
-	}
-}
-
-func TestApplierImplementsInterface(t *testing.T) {
-	// Compile-time check: if Applier doesn't implement the interface, this won't compile
-	var _ thinking.ProviderApplier = (*Applier)(nil)
-}
-
-// TestGeminiCLIApply tests the Gemini CLI thinking applier.
-//
-// Gemini CLI uses request.generationConfig.thinkingConfig.* path.
-// Behavior mirrors Gemini applier but with different JSON path prefix.
-func TestGeminiCLIApply(t *testing.T) {
-	applier := NewApplier()
-	tests := []struct {
-		name                string
-		model               string
-		config              thinking.ThinkingConfig
-		wantField           string
-		wantValue           interface{}
-		wantIncludeThoughts bool
-	}{
-		// Budget mode (no Levels)
-		{"budget 8k", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
-		{"budget zero", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
-		{"none mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
-		{"auto mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
-
-		// Level mode (has Levels)
-		{"level high", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
-		{"level low", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
-		{"level minimal", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
-		// ModeAuto with Levels model still uses thinkingBudget=-1
-		{"auto with levels", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildGeminiCLIModelInfo(tt.model)
-			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			gotField := gjson.GetBytes(result, "request.generationConfig.thinkingConfig."+tt.wantField)
-			switch want := tt.wantValue.(type) {
-			case int:
-				if int(gotField.Int()) != want {
-					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
-				}
-			case string:
-				if gotField.String() != want {
-					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
-				}
-			case bool:
-				if gotField.Bool() != want {
-					t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
-				}
-			default:
-				t.Fatalf("unsupported wantValue type %T", tt.wantValue)
-			}
-
-			gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
-			if gotIncludeThoughts != tt.wantIncludeThoughts {
-				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
-			}
-		})
-	}
-}
-
-// TestGeminiCLIApplyModeNoneWithLevel tests ModeNone with Level model.
-// When ModeNone is used with a model that has Levels, includeThoughts should be false.
-func TestGeminiCLIApplyModeNoneWithLevel(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
-	config := thinking.ThinkingConfig{Mode: thinking.ModeNone, Level: thinking.LevelLow}
-
-	result, err := applier.Apply([]byte(`{}`), config, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() error = %v", err)
-	}
-
-	gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
-	if gotIncludeThoughts != false {
-		t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, false)
-	}
-
-	gotLevel := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel").String()
-	if gotLevel != "low" {
-		t.Fatalf("thinkingLevel = %q, want %q", gotLevel, "low")
-	}
-}
-
-// TestGeminiCLIApplyInvalidBody tests Apply behavior with invalid body inputs.
-func TestGeminiCLIApplyInvalidBody(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildGeminiCLIModelInfo("gemini-cli-budget")
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-
-	tests := []struct {
-		name string
-		body []byte
-	}{
-		{"nil body", nil},
-		{"empty body", []byte{}},
-		{"invalid json", []byte("{\"not json\"")},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply(tt.body, config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			gotBudget := int(gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Int())
-			if gotBudget != 8192 {
-				t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
-			}
-
-			gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
-			if !gotIncludeThoughts {
-				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
-			}
-		})
-	}
-}
-
-// TestGeminiCLIApplyConflictingFields tests that conflicting fields are removed.
-//
-// When applying Budget format, any existing thinkingLevel should be removed.
-// When applying Level format, any existing thinkingBudget should be removed.
-func TestGeminiCLIApplyConflictingFields(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name         string
-		model        string
-		config       thinking.ThinkingConfig
-		existingBody string
-		wantField    string // expected field to exist
-		wantNoField  string // expected field to NOT exist
-	}{
-		// Budget format should remove existing thinkingLevel
-		{
-			"budget removes level",
-			"gemini-cli-budget",
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
-			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
-			"thinkingBudget",
-			"thinkingLevel",
-		},
-		// Level format should remove existing thinkingBudget
-		{
-			"level removes budget",
-			"gemini-cli-level",
-			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
-			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
-			"thinkingLevel",
-			"thinkingBudget",
-		},
-		// ModeAuto uses budget format, should remove thinkingLevel
-		{
-			"auto removes level",
-			"gemini-cli-level",
-			thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
-			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
-			"thinkingBudget",
-			"thinkingLevel",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildGeminiCLIModelInfo(tt.model)
-			result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			// Verify expected field exists
-			wantPath := "request.generationConfig.thinkingConfig." + tt.wantField
-			if !gjson.GetBytes(result, wantPath).Exists() {
-				t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
-			}
-
-			// Verify conflicting field was removed
-			noPath := "request.generationConfig.thinkingConfig." + tt.wantNoField
-			if gjson.GetBytes(result, noPath).Exists() {
-				t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
-			}
-		})
-	}
-}
-
-// TestGeminiCLIApplyThinkingNotSupported tests passthrough handling when modelInfo.Thinking is nil.
-func TestGeminiCLIApplyThinkingNotSupported(t *testing.T) {
-	applier := NewApplier()
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	body := []byte(`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`)
-
-	// Model with nil Thinking support
-	modelInfo := &registry.ModelInfo{ID: "gemini-cli-unknown", Thinking: nil}
-
-	got, err := applier.Apply(body, config, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() expected nil error for nil Thinking, got %v", err)
-	}
-	if string(got) != string(body) {
-		t.Fatalf("expected body unchanged, got %s", string(got))
-	}
-}
-
-// TestGeminiCLIApplyNilModelInfo tests Apply behavior when modelInfo is nil.
-func TestGeminiCLIApplyNilModelInfo(t *testing.T) {
-	applier := NewApplier()
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	body := []byte(`{"existing": "data"}`)
-
-	result, err := applier.Apply(body, config, nil)
-	if err != nil {
-		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
-	}
-	// nil modelInfo now applies compatible config
-	if !gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Exists() {
-		t.Fatalf("Apply() with nil modelInfo should apply thinking config, got: %s", result)
-	}
-}
-
-// TestGeminiCLIApplyEmptyModelID tests Apply when modelID is empty.
-func TestGeminiCLIApplyEmptyModelID(t *testing.T) {
-	applier := NewApplier()
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
-	body := []byte(`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`)
-
-	got, err := applier.Apply(body, config, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() expected nil error, got %v", err)
-	}
-	if string(got) != string(body) {
-		t.Fatalf("expected body unchanged, got %s", string(got))
-	}
-}
-
-// TestGeminiCLIApplyModeBudgetWithLevels tests that ModeBudget with Levels model passes through.
-// Apply layer doesn't convert - upper layer should handle Budget→Level conversion.
-func TestGeminiCLIApplyModeBudgetWithLevels(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
-	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
-	body := []byte(`{"existing": "data"}`)
-
-	result, err := applier.Apply(body, config, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() error = %v", err)
-	}
-	// ModeBudget applies budget format directly without conversion to levels
-	if !gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Exists() {
-		t.Fatalf("Apply() ModeBudget should apply budget format, got: %s", result)
-	}
-}
-
-// TestGeminiCLIApplyUnsupportedMode tests behavior with unsupported Mode types.
-func TestGeminiCLIApplyUnsupportedMode(t *testing.T) {
-	applier := NewApplier()
-	body := []byte(`{"existing": "data"}`)
-
-	tests := []struct {
-		name   string
-		model  string
-		config thinking.ThinkingConfig
-	}{
-		{"unknown mode with budget model", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
-		{"unknown mode with level model", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildGeminiCLIModelInfo(tt.model)
-			result, err := applier.Apply(body, tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			// Unsupported modes return original body unchanged
-			if string(result) != string(body) {
-				t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
-			}
-		})
-	}
-}
-
-// TestAntigravityUsesGeminiCLIFormat tests that antigravity provider uses gemini-cli format.
-// Antigravity is registered with the same applier as gemini-cli.
-func TestAntigravityUsesGeminiCLIFormat(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name      string
-		config    thinking.ThinkingConfig
-		modelInfo *registry.ModelInfo
-		wantField string
-	}{
-		{
-			"claude model budget",
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
-			&registry.ModelInfo{ID: "gemini-claude-sonnet-4-5-thinking", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000}},
-			"request.generationConfig.thinkingConfig.thinkingBudget",
-		},
-		{
-			"opus model budget",
-			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 32768},
-			&registry.ModelInfo{ID: "gemini-claude-opus-4-5-thinking", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000}},
-			"request.generationConfig.thinkingConfig.thinkingBudget",
-		},
-		{
-			"model with levels",
-			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
-			&registry.ModelInfo{ID: "some-model-with-levels", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000, Levels: []string{"low", "high"}}},
-			"request.generationConfig.thinkingConfig.thinkingLevel",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-
-			if !gjson.GetBytes(got, tt.wantField).Exists() {
-				t.Fatalf("expected field %s in output: %s", tt.wantField, string(got))
-			}
-		})
-	}
-}
-
-func buildGeminiCLIModelInfo(modelID string) *registry.ModelInfo {
-	support := &registry.ThinkingSupport{}
-	switch modelID {
-	case "gemini-cli-budget":
-		support.Min = 0
-		support.Max = 32768
-		support.ZeroAllowed = true
-		support.DynamicAllowed = true
-	case "gemini-cli-level":
-		support.Min = 128
-		support.Max = 32768
-		support.ZeroAllowed = false
-		support.DynamicAllowed = true
-		support.Levels = []string{"minimal", "low", "medium", "high"}
-	default:
-		// Unknown model - return nil Thinking to trigger error path
-		return &registry.ModelInfo{ID: modelID, Thinking: nil}
-	}
-	return &registry.ModelInfo{
-		ID:       modelID,
-		Thinking: support,
-	}
-}
diff --git a/internal/thinking/provider/iflow/apply_test.go b/internal/thinking/provider/iflow/apply_test.go
deleted file mode 100644
index 9718c413..00000000
--- a/internal/thinking/provider/iflow/apply_test.go
+++ /dev/null
@@ -1,318 +0,0 @@
-// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
-package iflow
-
-import (
-	"bytes"
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/tidwall/gjson"
-)
-
-func TestNewApplier(t *testing.T) {
-	tests := []struct {
-		name string
-	}{
-		{"default"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			applier := NewApplier()
-			if applier == nil {
-				t.Fatalf("expected non-nil applier")
-			}
-		})
-	}
-}
-
-func TestApplierImplementsInterface(t *testing.T) {
-	tests := []struct {
-		name    string
-		applier thinking.ProviderApplier
-	}{
-		{"default", NewApplier()},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if tt.applier == nil {
-				t.Fatalf("expected thinking.ProviderApplier implementation")
-			}
-		})
-	}
-}
-
-func TestApplyNilModelInfo(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name string
-		body []byte
-	}{
-		{"nil body", nil},
-		{"empty body", []byte{}},
-		{"json body", []byte(`{"model":"glm-4.6"}`)},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := applier.Apply(tt.body, thinking.ThinkingConfig{}, nil)
-			if err != nil {
-				t.Fatalf("expected nil error, got %v", err)
-			}
-			if !bytes.Equal(got, tt.body) {
-				t.Fatalf("expected body unchanged, got %s", string(got))
-			}
-		})
-	}
-}
-
-func TestApplyMissingThinkingSupport(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name    string
-		modelID string
-	}{
-		{"model id", "glm-4.6"},
-		{"empty model id", ""},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := &registry.ModelInfo{ID: tt.modelID}
-			body := []byte(`{"model":"` + tt.modelID + `"}`)
-			got, err := applier.Apply(body, thinking.ThinkingConfig{}, modelInfo)
-			if err != nil {
-				t.Fatalf("expected nil error, got %v", err)
-			}
-			if string(got) != string(body) {
-				t.Fatalf("expected body unchanged, got %s", string(got))
-			}
-		})
-	}
-}
-
-func TestConfigToBoolean(t *testing.T) {
-	tests := []struct {
-		name   string
-		config thinking.ThinkingConfig
-		want   bool
-	}{
-		{"mode none", thinking.ThinkingConfig{Mode: thinking.ModeNone}, false},
-		{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true},
-		{"budget zero", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false},
-		{"budget positive", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true},
-		{"level none", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false},
-		{"level minimal", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true},
-		{"level low", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true},
-		{"level medium", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true},
-		{"level high", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true},
-		{"level xhigh", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true},
-		{"zero value config", thinking.ThinkingConfig{}, false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := configToBoolean(tt.config); got != tt.want {
-				t.Fatalf("configToBoolean(%+v) = %v, want %v", tt.config, got, tt.want)
-			}
-		})
-	}
-}
-
-func TestApplyGLM(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name         string
-		modelID      string
-		body         []byte
-		config       thinking.ThinkingConfig
-		wantEnable   bool
-		wantPreserve string
-	}{
-		{"mode none", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, ""},
-		{"level none", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, ""},
-		{"mode auto", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
-		{"level minimal", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, ""},
-		{"level low", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, ""},
-		{"level medium", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, ""},
-		{"level high", "GLM-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, ""},
-		{"level xhigh", "glm-z1-preview", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, ""},
-		{"budget zero", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, ""},
-		{"budget 1000", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, ""},
-		{"preserve fields", "glm-4.6", []byte(`{"model":"glm-4.6","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "glm-4.6"},
-		{"empty body", "glm-4.6", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
-		{"malformed json", "glm-4.6", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := &registry.ModelInfo{
-				ID:       tt.modelID,
-				Thinking: &registry.ThinkingSupport{},
-			}
-			got, err := applier.Apply(tt.body, tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if !gjson.ValidBytes(got) {
-				t.Fatalf("expected valid JSON, got %s", string(got))
-			}
-
-			enableResult := gjson.GetBytes(got, "chat_template_kwargs.enable_thinking")
-			if !enableResult.Exists() {
-				t.Fatalf("enable_thinking missing")
-			}
-			gotEnable := enableResult.Bool()
-			if gotEnable != tt.wantEnable {
-				t.Fatalf("enable_thinking = %v, want %v", gotEnable, tt.wantEnable)
-			}
-
-			// clear_thinking only set when enable_thinking=true
-			clearResult := gjson.GetBytes(got, "chat_template_kwargs.clear_thinking")
-			if tt.wantEnable {
-				if !clearResult.Exists() {
-					t.Fatalf("clear_thinking missing when enable_thinking=true")
-				}
-				if clearResult.Bool() {
-					t.Fatalf("clear_thinking = %v, want false", clearResult.Bool())
-				}
-			} else {
-				if clearResult.Exists() {
-					t.Fatalf("clear_thinking should not exist when enable_thinking=false")
-				}
-			}
-
-			if tt.wantPreserve != "" {
-				gotModel := gjson.GetBytes(got, "model").String()
-				if gotModel != tt.wantPreserve {
-					t.Fatalf("model = %q, want %q", gotModel, tt.wantPreserve)
-				}
-				if !gjson.GetBytes(got, "extra.keep").Bool() {
-					t.Fatalf("expected extra.keep preserved")
-				}
-			}
-		})
-	}
-}
-
-func TestApplyMiniMax(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name      string
-		modelID   string
-		body      []byte
-		config    thinking.ThinkingConfig
-		wantSplit bool
-		wantModel string
-		wantKeep  bool
-	}{
-		{"mode none", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, "", false},
-		{"level none", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, "", false},
-		{"mode auto", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
-		{"level high", "MINIMAX-M2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, "", false},
-		{"level low", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, "", false},
-		{"level minimal", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, "", false},
-		{"level medium", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, "", false},
-		{"level xhigh", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, "", false},
-		{"budget zero", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, "", false},
-		{"budget 1000", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, "", false},
-		{"unknown level", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "unknown"}, true, "", false},
-		{"preserve fields", "minimax-m2", []byte(`{"model":"minimax-m2","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "minimax-m2", true},
-		{"empty body", "minimax-m2", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
-		{"malformed json", "minimax-m2", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := &registry.ModelInfo{
-				ID:       tt.modelID,
-				Thinking: &registry.ThinkingSupport{},
-			}
-			got, err := applier.Apply(tt.body, tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if !gjson.ValidBytes(got) {
-				t.Fatalf("expected valid JSON, got %s", string(got))
-			}
-
-			splitResult := gjson.GetBytes(got, "reasoning_split")
-			if !splitResult.Exists() {
-				t.Fatalf("reasoning_split missing")
-			}
-			// Verify JSON type is boolean, not string
-			if splitResult.Type != gjson.True && splitResult.Type != gjson.False {
-				t.Fatalf("reasoning_split should be boolean, got type %v", splitResult.Type)
-			}
-			gotSplit := splitResult.Bool()
-			if gotSplit != tt.wantSplit {
-				t.Fatalf("reasoning_split = %v, want %v", gotSplit, tt.wantSplit)
-			}
-
-			if tt.wantModel != "" {
-				gotModel := gjson.GetBytes(got, "model").String()
-				if gotModel != tt.wantModel {
-					t.Fatalf("model = %q, want %q", gotModel, tt.wantModel)
-				}
-				if tt.wantKeep && !gjson.GetBytes(got, "extra.keep").Bool() {
-					t.Fatalf("expected extra.keep preserved")
-				}
-			}
-		})
-	}
-}
-
-// TestIsGLMModel tests the GLM model detection.
-//
-// Depends on: Epic 9 Story 9-1
-func TestIsGLMModel(t *testing.T) {
-	tests := []struct {
-		name    string
-		model   string
-		wantGLM bool
-	}{
-		{"glm-4.6", "glm-4.6", true},
-		{"glm-z1-preview", "glm-z1-preview", true},
-		{"glm uppercase", "GLM-4.7", true},
-		{"minimax-01", "minimax-01", false},
-		{"gpt-5.2", "gpt-5.2", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := isGLMModel(tt.model); got != tt.wantGLM {
-				t.Fatalf("isGLMModel(%q) = %v, want %v", tt.model, got, tt.wantGLM)
-			}
-		})
-	}
-}
-
-// TestIsMiniMaxModel tests the MiniMax model detection.
-//
-// Depends on: Epic 9 Story 9-1
-func TestIsMiniMaxModel(t *testing.T) {
-	tests := []struct {
-		name        string
-		model       string
-		wantMiniMax bool
-	}{
-		{"minimax-01", "minimax-01", true},
-		{"minimax uppercase", "MINIMAX-M2", true},
-		{"glm-4.6", "glm-4.6", false},
-		{"gpt-5.2", "gpt-5.2", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := isMiniMaxModel(tt.model); got != tt.wantMiniMax {
-				t.Fatalf("isMiniMaxModel(%q) = %v, want %v", tt.model, got, tt.wantMiniMax)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/provider/openai/apply_test.go b/internal/thinking/provider/openai/apply_test.go
deleted file mode 100644
index 1e348d9e..00000000
--- a/internal/thinking/provider/openai/apply_test.go
+++ /dev/null
@@ -1,336 +0,0 @@
-// Package openai implements thinking configuration for OpenAI/Codex models.
-package openai
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/tidwall/gjson"
-)
-
-func buildOpenAIModelInfo(modelID string) *registry.ModelInfo {
-	info := registry.LookupStaticModelInfo(modelID)
-	if info != nil {
-		return info
-	}
-	// Fallback with complete ThinkingSupport matching real OpenAI model capabilities
-	return &registry.ModelInfo{
-		ID: modelID,
-		Thinking: &registry.ThinkingSupport{
-			Min:         1024,
-			Max:         32768,
-			ZeroAllowed: true,
-			Levels:      []string{"none", "low", "medium", "high", "xhigh"},
-		},
-	}
-}
-
-func TestNewApplier(t *testing.T) {
-	applier := NewApplier()
-	if applier == nil {
-		t.Fatalf("expected non-nil applier")
-	}
-}
-
-func TestApplierImplementsInterface(t *testing.T) {
-	_, ok := interface{}(NewApplier()).(thinking.ProviderApplier)
-	if !ok {
-		t.Fatalf("expected Applier to implement thinking.ProviderApplier")
-	}
-}
-
-func TestApplyNilModelInfo(t *testing.T) {
-	applier := NewApplier()
-	body := []byte(`{"model":"gpt-5.2"}`)
-	config := thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}
-	got, err := applier.Apply(body, config, nil)
-	if err != nil {
-		t.Fatalf("expected nil error, got %v", err)
-	}
-	// nil modelInfo now applies compatible config
-	if !gjson.GetBytes(got, "reasoning_effort").Exists() {
-		t.Fatalf("expected reasoning_effort applied, got %s", string(got))
-	}
-}
-
-func TestApplyMissingThinkingSupport(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := &registry.ModelInfo{ID: "gpt-5.2"}
-	body := []byte(`{"model":"gpt-5.2"}`)
-	got, err := applier.Apply(body, thinking.ThinkingConfig{}, modelInfo)
-	if err != nil {
-		t.Fatalf("expected nil error, got %v", err)
-	}
-	if string(got) != string(body) {
-		t.Fatalf("expected body unchanged, got %s", string(got))
-	}
-}
-
-// TestApplyLevel tests Apply with ModeLevel (unit test, no ValidateConfig).
-func TestApplyLevel(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildOpenAIModelInfo("gpt-5.2")
-
-	tests := []struct {
-		name  string
-		level thinking.ThinkingLevel
-		want  string
-	}{
-		{"high", thinking.LevelHigh, "high"},
-		{"medium", thinking.LevelMedium, "medium"},
-		{"low", thinking.LevelLow, "low"},
-		{"xhigh", thinking.LevelXHigh, "xhigh"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply([]byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: tt.level}, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
-				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
-			}
-		})
-	}
-}
-
-// TestApplyModeNone tests Apply with ModeNone (unit test).
-func TestApplyModeNone(t *testing.T) {
-	applier := NewApplier()
-
-	tests := []struct {
-		name      string
-		config    thinking.ThinkingConfig
-		modelInfo *registry.ModelInfo
-		want      string
-	}{
-		{"zero allowed", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, &registry.ModelInfo{ID: "gpt-5.2", Thinking: &registry.ThinkingSupport{ZeroAllowed: true, Levels: []string{"none", "low"}}}, "none"},
-		{"clamped to level", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 128, Level: thinking.LevelLow}, &registry.ModelInfo{ID: "gpt-5", Thinking: &registry.ThinkingSupport{Levels: []string{"minimal", "low"}}}, "low"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
-				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
-			}
-		})
-	}
-}
-
-// TestApplyPassthrough tests that unsupported modes pass through unchanged.
-func TestApplyPassthrough(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildOpenAIModelInfo("gpt-5.2")
-
-	tests := []struct {
-		name   string
-		config thinking.ThinkingConfig
-	}{
-		{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}},
-		{"mode budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			body := []byte(`{"model":"gpt-5.2"}`)
-			result, err := applier.Apply(body, tt.config, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if string(result) != string(body) {
-				t.Fatalf("Apply() result = %s, want %s", string(result), string(body))
-			}
-		})
-	}
-}
-
-// TestApplyInvalidBody tests Apply with invalid body input.
-func TestApplyInvalidBody(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildOpenAIModelInfo("gpt-5.2")
-
-	tests := []struct {
-		name string
-		body []byte
-	}{
-		{"nil body", nil},
-		{"empty body", []byte{}},
-		{"invalid json", []byte(`{"not json"`)},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := applier.Apply(tt.body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if !gjson.ValidBytes(result) {
-				t.Fatalf("Apply() result is not valid JSON: %s", string(result))
-			}
-			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "high" {
-				t.Fatalf("reasoning_effort = %q, want %q", got, "high")
-			}
-		})
-	}
-}
-
-// TestApplyPreservesFields tests that existing body fields are preserved.
-func TestApplyPreservesFields(t *testing.T) {
-	applier := NewApplier()
-	modelInfo := buildOpenAIModelInfo("gpt-5.2")
-
-	body := []byte(`{"model":"gpt-5.2","messages":[]}`)
-	result, err := applier.Apply(body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, modelInfo)
-	if err != nil {
-		t.Fatalf("Apply() error = %v", err)
-	}
-	if got := gjson.GetBytes(result, "model").String(); got != "gpt-5.2" {
-		t.Fatalf("model = %q, want %q", got, "gpt-5.2")
-	}
-	if !gjson.GetBytes(result, "messages").Exists() {
-		t.Fatalf("messages missing from result: %s", string(result))
-	}
-	if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "low" {
-		t.Fatalf("reasoning_effort = %q, want %q", got, "low")
-	}
-}
-
-// TestHasLevel tests the hasLevel helper function.
-func TestHasLevel(t *testing.T) {
-	tests := []struct {
-		name   string
-		levels []string
-		target string
-		want   bool
-	}{
-		{"exact match", []string{"low", "medium", "high"}, "medium", true},
-		{"case insensitive", []string{"low", "medium", "high"}, "MEDIUM", true},
-		{"with spaces", []string{"low", " medium ", "high"}, "medium", true},
-		{"not found", []string{"low", "medium", "high"}, "xhigh", false},
-		{"empty levels", []string{}, "medium", false},
-		{"none level", []string{"none", "low", "medium"}, "none", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := hasLevel(tt.levels, tt.target); got != tt.want {
-				t.Fatalf("hasLevel(%v, %q) = %v, want %v", tt.levels, tt.target, got, tt.want)
-			}
-		})
-	}
-}
-
-// --- End-to-End Tests (ValidateConfig → Apply) ---
-
-// TestE2EApply tests the full flow: ValidateConfig → Apply.
-func TestE2EApply(t *testing.T) {
-	tests := []struct {
-		name   string
-		model  string
-		config thinking.ThinkingConfig
-		want   string
-	}{
-		{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high"},
-		{"level medium", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium"},
-		{"level low", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "low"},
-		{"level xhigh", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, "xhigh"},
-		{"mode none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "none"},
-		{"budget to level", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "medium"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildOpenAIModelInfo(tt.model)
-			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
-			if err != nil {
-				t.Fatalf("ValidateConfig() error = %v", err)
-			}
-
-			applier := NewApplier()
-			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
-				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
-			}
-		})
-	}
-}
-
-// TestE2EApplyOutputFormat tests the full flow with exact JSON output verification.
-func TestE2EApplyOutputFormat(t *testing.T) {
-	tests := []struct {
-		name     string
-		model    string
-		config   thinking.ThinkingConfig
-		wantJSON string
-	}{
-		{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, `{"reasoning_effort":"high"}`},
-		{"level none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, `{"reasoning_effort":"none"}`},
-		{"budget converted", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, `{"reasoning_effort":"medium"}`},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildOpenAIModelInfo(tt.model)
-			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
-			if err != nil {
-				t.Fatalf("ValidateConfig() error = %v", err)
-			}
-
-			applier := NewApplier()
-			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if string(result) != tt.wantJSON {
-				t.Fatalf("Apply() result = %s, want %s", string(result), tt.wantJSON)
-			}
-		})
-	}
-}
-
-// TestE2EApplyWithExistingBody tests the full flow with existing body fields.
-func TestE2EApplyWithExistingBody(t *testing.T) {
-	tests := []struct {
-		name       string
-		body       string
-		config     thinking.ThinkingConfig
-		wantEffort string
-		wantModel  string
-	}{
-		{"empty body", `{}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high", ""},
-		{"preserve fields", `{"model":"gpt-5.2","messages":[]}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium", "gpt-5.2"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			modelInfo := buildOpenAIModelInfo("gpt-5.2")
-			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
-			if err != nil {
-				t.Fatalf("ValidateConfig() error = %v", err)
-			}
-
-			applier := NewApplier()
-			result, err := applier.Apply([]byte(tt.body), *normalized, modelInfo)
-			if err != nil {
-				t.Fatalf("Apply() error = %v", err)
-			}
-			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.wantEffort {
-				t.Fatalf("reasoning_effort = %q, want %q", got, tt.wantEffort)
-			}
-			if tt.wantModel != "" {
-				if got := gjson.GetBytes(result, "model").String(); got != tt.wantModel {
-					t.Fatalf("model = %q, want %q", got, tt.wantModel)
-				}
-			}
-		})
-	}
-}
diff --git a/internal/thinking/provider_map_test.go b/internal/thinking/provider_map_test.go
deleted file mode 100644
index 0944c246..00000000
--- a/internal/thinking/provider_map_test.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Package thinking_test provides external tests for the thinking package.
-//
-// This file uses package thinking_test (external) to allow importing provider
-// subpackages, which triggers their init() functions to register appliers.
-// This avoids import cycles that would occur if thinking package imported providers directly.
-package thinking_test
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-
-	// Blank imports to trigger provider init() registration
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
-)
-
-func TestProviderAppliersBasic(t *testing.T) {
-	tests := []struct {
-		name     string
-		provider string
-		wantNil  bool
-	}{
-		{"gemini provider", "gemini", false},
-		{"gemini-cli provider", "gemini-cli", false},
-		{"claude provider", "claude", false},
-		{"openai provider", "openai", false},
-		{"iflow provider", "iflow", false},
-		{"antigravity provider", "antigravity", false},
-		{"unknown provider", "unknown", true},
-		{"empty provider", "", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := thinking.GetProviderApplier(tt.provider)
-			if tt.wantNil {
-				if got != nil {
-					t.Fatalf("GetProviderApplier(%q) = %T, want nil", tt.provider, got)
-				}
-				return
-			}
-			if got == nil {
-				t.Fatalf("GetProviderApplier(%q) = nil, want non-nil", tt.provider)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/strip_test.go b/internal/thinking/strip_test.go
deleted file mode 100644
index edd6bd1a..00000000
--- a/internal/thinking/strip_test.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Package thinking_test provides tests for thinking config stripping.
-package thinking_test
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/tidwall/gjson"
-)
-
-func TestStripThinkingConfig(t *testing.T) {
-	tests := []struct {
-		name      string
-		body      string
-		provider  string
-		stripped  []string
-		preserved []string
-	}{
-		{"claude thinking", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude", []string{"thinking"}, []string{"model"}},
-		{"gemini thinkingConfig", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}},
-		{"gemini-cli thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}}`, "gemini-cli", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.temperature"}},
-		{"antigravity thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":4096},"maxTokens":1024}}}`, "antigravity", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.maxTokens"}},
-		{"openai reasoning_effort", `{"reasoning_effort":"high","model":"gpt-5"}`, "openai", []string{"reasoning_effort"}, []string{"model"}},
-		{"iflow glm", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}},
-		{"iflow minimax", `{"reasoning_split":true,"model":"minimax"}`, "iflow", []string{"reasoning_split"}, []string{"model"}},
-		{"iflow both formats", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false},"reasoning_split":true,"model":"mixed"}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking", "reasoning_split"}, []string{"model"}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider)
-
-			for _, path := range tt.stripped {
-				if gjson.GetBytes(got, path).Exists() {
-					t.Fatalf("expected %s to be stripped, got %s", path, string(got))
-				}
-			}
-			for _, path := range tt.preserved {
-				if !gjson.GetBytes(got, path).Exists() {
-					t.Fatalf("expected %s to be preserved, got %s", path, string(got))
-				}
-			}
-		})
-	}
-}
-
-func TestStripThinkingConfigPassthrough(t *testing.T) {
-	tests := []struct {
-		name     string
-		body     string
-		provider string
-	}{
-		{"empty body", ``, "claude"},
-		{"invalid json", `{not valid`, "claude"},
-		{"unknown provider", `{"thinking":{"budget_tokens":8192}}`, "unknown"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider)
-			if string(got) != tt.body {
-				t.Fatalf("StripThinkingConfig() = %s, want passthrough %s", string(got), tt.body)
-			}
-		})
-	}
-}
diff --git a/internal/thinking/suffix_test.go b/internal/thinking/suffix_test.go
deleted file mode 100644
index b3ea3ed3..00000000
--- a/internal/thinking/suffix_test.go
+++ /dev/null
@@ -1,313 +0,0 @@
-// Package thinking provides unified thinking configuration processing.
-package thinking
-
-import (
-	"strings"
-	"testing"
-)
-
-// TestParseSuffix tests the ParseSuffix function.
-//
-// ParseSuffix extracts thinking suffix from model name.
-// Format: model-name(value) where value is the raw suffix content.
-// This function only extracts; interpretation is done by other functions.
-func TestParseSuffix(t *testing.T) {
-	tests := []struct {
-		name       string
-		model      string
-		wantModel  string
-		wantSuffix bool
-		wantRaw    string
-	}{
-		{"no suffix", "claude-sonnet-4-5", "claude-sonnet-4-5", false, ""},
-		{"numeric suffix", "model(1000)", "model", true, "1000"},
-		{"level suffix", "gpt-5(high)", "gpt-5", true, "high"},
-		{"auto suffix", "gemini-2.5-pro(auto)", "gemini-2.5-pro", true, "auto"},
-		{"none suffix", "model(none)", "model", true, "none"},
-		{"complex model name", "gemini-2.5-flash-lite(8192)", "gemini-2.5-flash-lite", true, "8192"},
-		{"alias with suffix", "g25p(1000)", "g25p", true, "1000"},
-		{"empty suffix", "model()", "model", true, ""},
-		{"nested parens", "model(a(b))", "model(a", true, "b)"},
-		{"no model name", "(1000)", "", true, "1000"},
-		{"unmatched open", "model(", "model(", false, ""},
-		{"unmatched close", "model)", "model)", false, ""},
-		{"paren not at end", "model(1000)extra", "model(1000)extra", false, ""},
-		{"empty string", "", "", false, ""},
-		{"large budget", "claude-opus(128000)", "claude-opus", true, "128000"},
-		{"xhigh level", "gpt-5.2(xhigh)", "gpt-5.2", true, "xhigh"},
-		{"minimal level", "model(minimal)", "model", true, "minimal"},
-		{"medium level", "model(medium)", "model", true, "medium"},
-		{"low level", "model(low)", "model", true, "low"},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := ParseSuffix(tt.model)
-			if got.ModelName != tt.wantModel {
-				t.Errorf("ModelName = %q, want %q", got.ModelName, tt.wantModel)
-			}
-			if got.HasSuffix != tt.wantSuffix {
-				t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantSuffix)
-			}
-			if got.RawSuffix != tt.wantRaw {
-				t.Errorf("RawSuffix = %q, want %q", got.RawSuffix, tt.wantRaw)
-			}
-		})
-	}
-}
-
-// TestParseSuffixWithError tests invalid suffix error reporting.
-func TestParseSuffixWithError(t *testing.T) {
-	tests := []struct {
-		name          string
-		model         string
-		wantHasSuffix bool
-	}{
-		{"missing close paren", "model(abc", false},
-		{"unmatched close paren", "model)", false},
-		{"paren not at end", "model(1000)extra", false},
-		{"no suffix", "gpt-5", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := ParseSuffixWithError(tt.model)
-			if tt.name == "no suffix" {
-				if err != nil {
-					t.Fatalf("ParseSuffixWithError(%q) error = %v, want nil", tt.model, err)
-				}
-				if got.HasSuffix != tt.wantHasSuffix {
-					t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix)
-				}
-				return
-			}
-
-			if err == nil {
-				t.Fatalf("ParseSuffixWithError(%q) error = nil, want error", tt.model)
-			}
-			thinkingErr, ok := err.(*ThinkingError)
-			if !ok {
-				t.Fatalf("ParseSuffixWithError(%q) error type = %T, want *ThinkingError", tt.model, err)
-			}
-			if thinkingErr.Code != ErrInvalidSuffix {
-				t.Errorf("error code = %v, want %v", thinkingErr.Code, ErrInvalidSuffix)
-			}
-			if !strings.Contains(thinkingErr.Message, tt.model) {
-				t.Errorf("message %q does not include input %q", thinkingErr.Message, tt.model)
-			}
-			if got.HasSuffix != tt.wantHasSuffix {
-				t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix)
-			}
-		})
-	}
-}
-
-// TestParseSuffixNumeric tests numeric suffix parsing.
-//
-// ParseNumericSuffix parses raw suffix content as integer budget.
-// Only non-negative integers are valid. Negative numbers return ok=false.
-func TestParseSuffixNumeric(t *testing.T) {
-	tests := []struct {
-		name       string
-		rawSuffix  string
-		wantBudget int
-		wantOK     bool
-	}{
-		{"small budget", "512", 512, true},
-		{"standard budget", "8192", 8192, true},
-		{"large budget", "100000", 100000, true},
-		{"max int32", "2147483647", 2147483647, true},
-		{"max int64", "9223372036854775807", 9223372036854775807, true},
-		{"zero", "0", 0, true},
-		{"negative one", "-1", 0, false},
-		{"negative", "-100", 0, false},
-		{"int64 overflow", "9223372036854775808", 0, false},
-		{"large overflow", "99999999999999999999", 0, false},
-		{"not a number", "abc", 0, false},
-		{"level string", "high", 0, false},
-		{"float", "1.5", 0, false},
-		{"empty", "", 0, false},
-		{"leading zero", "08192", 8192, true},
-		{"whitespace", "  8192  ", 0, false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			budget, ok := ParseNumericSuffix(tt.rawSuffix)
-			if budget != tt.wantBudget {
-				t.Errorf("budget = %d, want %d", budget, tt.wantBudget)
-			}
-			if ok != tt.wantOK {
-				t.Errorf("ok = %v, want %v", ok, tt.wantOK)
-			}
-		})
-	}
-}
-
-// TestParseSuffixLevel tests level suffix parsing.
-//
-// ParseLevelSuffix parses raw suffix content as discrete thinking level.
-// Only effort levels (minimal, low, medium, high, xhigh) are valid.
-// Special values (none, auto) return ok=false - use ParseSpecialSuffix instead.
-func TestParseSuffixLevel(t *testing.T) {
-	tests := []struct {
-		name      string
-		rawSuffix string
-		wantLevel ThinkingLevel
-		wantOK    bool
-	}{
-		{"minimal", "minimal", LevelMinimal, true},
-		{"low", "low", LevelLow, true},
-		{"medium", "medium", LevelMedium, true},
-		{"high", "high", LevelHigh, true},
-		{"xhigh", "xhigh", LevelXHigh, true},
-		{"case HIGH", "HIGH", LevelHigh, true},
-		{"case High", "High", LevelHigh, true},
-		{"case hIgH", "hIgH", LevelHigh, true},
-		{"case MINIMAL", "MINIMAL", LevelMinimal, true},
-		{"case XHigh", "XHigh", LevelXHigh, true},
-		{"none special", "none", "", false},
-		{"auto special", "auto", "", false},
-		{"unknown ultra", "ultra", "", false},
-		{"unknown maximum", "maximum", "", false},
-		{"unknown invalid", "invalid", "", false},
-		{"numeric", "8192", "", false},
-		{"numeric zero", "0", "", false},
-		{"empty", "", "", false},
-		{"whitespace", "  high  ", "", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			level, ok := ParseLevelSuffix(tt.rawSuffix)
-			if level != tt.wantLevel {
-				t.Errorf("level = %q, want %q", level, tt.wantLevel)
-			}
-			if ok != tt.wantOK {
-				t.Errorf("ok = %v, want %v", ok, tt.wantOK)
-			}
-		})
-	}
-}
-
-// TestParseSuffixSpecialValues tests special value suffix parsing.
-//
-// Depends on: Epic 3 Story 3-4 (special value suffix parsing)
-func TestParseSuffixSpecialValues(t *testing.T) {
-	tests := []struct {
-		name      string
-		rawSuffix string
-		wantMode  ThinkingMode
-		wantOK    bool
-	}{
-		{"none", "none", ModeNone, true},
-		{"auto", "auto", ModeAuto, true},
-		{"negative one", "-1", ModeAuto, true},
-		{"case NONE", "NONE", ModeNone, true},
-		{"case Auto", "Auto", ModeAuto, true},
-		{"case aUtO", "aUtO", ModeAuto, true},
-		{"case NoNe", "NoNe", ModeNone, true},
-		{"empty", "", ModeBudget, false},
-		{"level high", "high", ModeBudget, false},
-		{"numeric", "8192", ModeBudget, false},
-		{"negative other", "-2", ModeBudget, false},
-		{"whitespace", "  none  ", ModeBudget, false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			mode, ok := ParseSpecialSuffix(tt.rawSuffix)
-			if mode != tt.wantMode {
-				t.Errorf("mode = %q, want %q", mode, tt.wantMode)
-			}
-			if ok != tt.wantOK {
-				t.Errorf("ok = %v, want %v", ok, tt.wantOK)
-			}
-		})
-	}
-}
-
-// TestParseSuffixAliasFormats tests alias model suffix parsing.
-//
-// This test validates that short model aliases (e.g., g25p, cs45) work correctly
-// with all suffix types. Alias-to-canonical-model mapping is caller's responsibility.
-func TestParseSuffixAliasFormats(t *testing.T) {
-	tests := []struct {
-		name        string        // test case description
-		model       string        // input model string with optional suffix
-		wantName    string        // expected ModelName after parsing
-		wantSuffix  bool          // expected HasSuffix value
-		wantRaw     string        // expected RawSuffix value
-		checkBudget bool          // if true, verify ParseNumericSuffix result
-		wantBudget  int           // expected budget (only when checkBudget=true)
-		checkLevel  bool          // if true, verify ParseLevelSuffix result
-		wantLevel   ThinkingLevel // expected level (only when checkLevel=true)
-		checkMode   bool          // if true, verify ParseSpecialSuffix result
-		wantMode    ThinkingMode  // expected mode (only when checkMode=true)
-	}{
-		// Alias + numeric suffix
-		{"alias numeric g25p", "g25p(1000)", "g25p", true, "1000", true, 1000, false, "", false, 0},
-		{"alias numeric cs45", "cs45(16384)", "cs45", true, "16384", true, 16384, false, "", false, 0},
-		{"alias numeric g3f", "g3f(8192)", "g3f", true, "8192", true, 8192, false, "", false, 0},
-		// Alias + level suffix
-		{"alias level gpt52", "gpt52(high)", "gpt52", true, "high", false, 0, true, LevelHigh, false, 0},
-		{"alias level g25f", "g25f(medium)", "g25f", true, "medium", false, 0, true, LevelMedium, false, 0},
-		{"alias level cs4", "cs4(low)", "cs4", true, "low", false, 0, true, LevelLow, false, 0},
-		// Alias + special suffix
-		{"alias auto g3f", "g3f(auto)", "g3f", true, "auto", false, 0, false, "", true, ModeAuto},
-		{"alias none claude", "claude(none)", "claude", true, "none", false, 0, false, "", true, ModeNone},
-		{"alias -1 g25p", "g25p(-1)", "g25p", true, "-1", false, 0, false, "", true, ModeAuto},
-		// Single char alias
-		{"single char c", "c(1024)", "c", true, "1024", true, 1024, false, "", false, 0},
-		{"single char g", "g(high)", "g", true, "high", false, 0, true, LevelHigh, false, 0},
-		// Alias containing numbers
-		{"alias with num gpt5", "gpt5(medium)", "gpt5", true, "medium", false, 0, true, LevelMedium, false, 0},
-		{"alias with num g25", "g25(1000)", "g25", true, "1000", true, 1000, false, "", false, 0},
-		// Edge cases
-		{"no suffix", "g25p", "g25p", false, "", false, 0, false, "", false, 0},
-		{"empty alias", "(1000)", "", true, "1000", true, 1000, false, "", false, 0},
-		{"hyphen alias", "g-25-p(1000)", "g-25-p", true, "1000", true, 1000, false, "", false, 0},
-		{"underscore alias", "g_25_p(high)", "g_25_p", true, "high", false, 0, true, LevelHigh, false, 0},
-		{"nested parens", "g25p(test)(1000)", "g25p(test)", true, "1000", true, 1000, false, "", false, 0},
-	}
-
-	// ParseSuffix only extracts alias and suffix; mapping to canonical model is caller responsibility.
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := ParseSuffix(tt.model)
-
-			if result.ModelName != tt.wantName {
-				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantName)
-			}
-			if result.HasSuffix != tt.wantSuffix {
-				t.Errorf("ParseSuffix(%q).HasSuffix = %v, want %v", tt.model, result.HasSuffix, tt.wantSuffix)
-			}
-			if result.RawSuffix != tt.wantRaw {
-				t.Errorf("ParseSuffix(%q).RawSuffix = %q, want %q", tt.model, result.RawSuffix, tt.wantRaw)
-			}
-
-			if result.HasSuffix {
-				if tt.checkBudget {
-					budget, ok := ParseNumericSuffix(result.RawSuffix)
-					if !ok || budget != tt.wantBudget {
-						t.Errorf("ParseNumericSuffix(%q) = (%d, %v), want (%d, true)",
-							result.RawSuffix, budget, ok, tt.wantBudget)
-					}
-				}
-				if tt.checkLevel {
-					level, ok := ParseLevelSuffix(result.RawSuffix)
-					if !ok || level != tt.wantLevel {
-						t.Errorf("ParseLevelSuffix(%q) = (%q, %v), want (%q, true)",
-							result.RawSuffix, level, ok, tt.wantLevel)
-					}
-				}
-				if tt.checkMode {
-					mode, ok := ParseSpecialSuffix(result.RawSuffix)
-					if !ok || mode != tt.wantMode {
-						t.Errorf("ParseSpecialSuffix(%q) = (%v, %v), want (%v, true)",
-							result.RawSuffix, mode, ok, tt.wantMode)
-					}
-				}
-			}
-		})
-	}
-}
diff --git a/internal/thinking/validate_test.go b/internal/thinking/validate_test.go
deleted file mode 100644
index e17a1586..00000000
--- a/internal/thinking/validate_test.go
+++ /dev/null
@@ -1,349 +0,0 @@
-// Package thinking provides unified thinking configuration processing logic.
-package thinking
-
-import (
-	"strings"
-	"testing"
-	"unicode"
-	"unicode/utf8"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	log "github.com/sirupsen/logrus"
-	logtest "github.com/sirupsen/logrus/hooks/test"
-)
-
-// TestClampBudget tests the ClampBudget function.
-//
-// ClampBudget applies range constraints to a budget value:
-//   - budget < Min → clamp to Min (with Debug log)
-//   - budget > Max → clamp to Max (with Debug log)
-//   - Auto value (-1) passes through unchanged
-func TestClampBudget(t *testing.T) {
-	tests := []struct {
-		name  string
-		value int
-		min   int
-		max   int
-		want  int
-	}{
-		// Within range - no clamping
-		{"within range", 8192, 128, 32768, 8192},
-		{"at min", 128, 128, 32768, 128},
-		{"at max", 32768, 128, 32768, 32768},
-
-		// Below min - clamp to min
-		{"below min", 100, 128, 32768, 128},
-
-		// Above max - clamp to max
-		{"above max", 50000, 128, 32768, 32768},
-
-		// Edge cases
-		{"min equals max", 5000, 5000, 5000, 5000},
-		{"zero min zero value", 0, 0, 100, 0},
-
-		// Auto value (-1) - passes through
-		{"auto value", -1, 128, 32768, -1},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := ClampBudget(tt.value, tt.min, tt.max)
-			if got != tt.want {
-				t.Errorf("ClampBudget(%d, %d, %d) = %d, want %d",
-					tt.value, tt.min, tt.max, got, tt.want)
-			}
-		})
-	}
-}
-
-// TestZeroAllowedBoundaryHandling tests ZeroAllowed=false edge cases.
-//
-// When ZeroAllowed=false and user requests 0, clamp to Min + log Warn.
-func TestZeroAllowedBoundaryHandling(t *testing.T) {
-	tests := []struct {
-		name        string
-		value       int
-		min         int
-		max         int
-		zeroAllowed bool
-		want        int
-	}{
-		// ZeroAllowed=true: 0 stays 0
-		{"zero allowed - keep zero", 0, 128, 32768, true, 0},
-
-		// ZeroAllowed=false: 0 clamps to min
-		{"zero not allowed - clamp to min", 0, 128, 32768, false, 128},
-
-		// ZeroAllowed=false but non-zero value: normal clamping
-		{"zero not allowed - positive value", 8192, 1024, 100000, false, 8192},
-
-		// Auto value (-1) always passes through
-		{"auto value", -1, 128, 32768, false, -1},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := ClampBudgetWithZeroCheck(tt.value, tt.min, tt.max, tt.zeroAllowed)
-			if got != tt.want {
-				t.Errorf("ClampBudgetWithZeroCheck(%d, %d, %d, %v) = %d, want %d",
-					tt.value, tt.min, tt.max, tt.zeroAllowed, got, tt.want)
-			}
-		})
-	}
-}
-
-// TestValidateConfigFramework verifies the ValidateConfig function framework.
-// This test is merged into TestValidateConfig for consolidation.
-
-// TestValidateConfigNotSupported verifies nil support handling.
-// This test is merged into TestValidateConfig for consolidation.
-
-// TestValidateConfigConversion verifies mode conversion based on capability.
-// This test is merged into TestValidateConfig for consolidation.
-
-// TestValidateConfigLevelSupport verifies level list validation.
-// This test is merged into TestValidateConfig for consolidation.
-
-// TestValidateConfigClamping verifies budget clamping behavior.
-// This test is merged into TestValidateConfig for consolidation.
-
-// TestValidateConfig is the comprehensive test for ValidateConfig function.
-//
-// ValidateConfig checks if a ThinkingConfig is valid for a given model.
-// This test covers all validation scenarios including:
-//   - Framework basics (nil support with ModeNone)
-//   - Error cases (thinking not supported, level not supported, dynamic not allowed)
-//   - Mode conversion (budget-only, level-only, hybrid)
-//   - Budget clamping (to max, to min)
-//   - ZeroAllowed boundary handling (ModeNone with ZeroAllowed=false)
-//   - DynamicAllowed validation
-//
-// Depends on: Epic 5 Story 5-3 (config validity validation)
-func TestValidateConfig(t *testing.T) {
-	tests := []struct {
-		name       string
-		config     ThinkingConfig
-		support    *registry.ThinkingSupport
-		wantMode   ThinkingMode
-		wantBudget int
-		wantLevel  ThinkingLevel
-		wantErr    bool
-		wantCode   ErrorCode
-	}{
-		// Framework basics
-		{"nil support mode none", ThinkingConfig{Mode: ModeNone, Budget: 0}, nil, ModeNone, 0, "", false, ""},
-
-		// Valid configs - no conversion needed
-		{"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 8192, "", false, ""},
-
-		// Auto-conversion: Level → Budget
-		{"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 24576, "", false, ""},
-
-		// Auto-conversion: Budget → Level
-		{"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 5000}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, LevelMedium, false, ""},
-
-		// Hybrid preserves original format
-		{"hybrid preserves level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}}, ModeLevel, 0, LevelLow, false, ""},
-
-		// Budget clamping
-		{"budget clamped to max", ThinkingConfig{Mode: ModeBudget, Budget: 200000}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 100000, "", false, ""},
-		{"budget clamped to min", ThinkingConfig{Mode: ModeBudget, Budget: 100}, &registry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 1024, "", false, ""},
-
-		// Error: thinking not supported
-		{"thinking not supported", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, 0, 0, "", true, ErrThinkingNotSupported},
-
-		// Error: level not in list
-		{"level not supported", ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, 0, 0, "", true, ErrLevelNotSupported},
-
-		// Level case-insensitive
-		{"level supported case-insensitive", ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel("HIGH")}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, ThinkingLevel("HIGH"), false, ""},
-
-		// ModeAuto with DynamicAllowed
-		{"auto with dynamic allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, &registry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
-
-		// ModeAuto with DynamicAllowed=false - converts to mid-range (M3)
-		{"auto with dynamic not allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, &registry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""},
-
-		// ModeNone with ZeroAllowed=true - stays as ModeNone
-		{"mode none with zero allowed", ThinkingConfig{Mode: ModeNone, Budget: 0}, &registry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: true}, ModeNone, 0, "", false, ""},
-
-		// Budget=0 converts to ModeNone before clamping (M1)
-		{"budget zero converts to none", ThinkingConfig{Mode: ModeBudget, Budget: 0}, &registry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false}, ModeNone, 128, "", false, ""},
-
-		// Level=none converts to ModeNone before clamping, then Level set to lowest
-		{"level none converts to none", ThinkingConfig{Mode: ModeLevel, Level: LevelNone}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""},
-		{"level auto converts to auto", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
-		// M1: Level=auto with DynamicAllowed=false - converts to mid-range budget
-		{"level auto with dynamic not allowed", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""},
-		// M2: Level=auto on Budget-only model (no Levels)
-		{"level auto on budget-only model", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, &registry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""},
-
-		// ModeNone with ZeroAllowed=false - clamps to min but preserves ModeNone (M1)
-		{"mode none with zero not allowed - preserve mode", ThinkingConfig{Mode: ModeNone, Budget: 0}, &registry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false}, ModeNone, 1024, "", false, ""},
-
-		// ModeNone with clamped Budget > 0 and Levels: sets Level to lowest
-		{"mode none clamped with levels", ThinkingConfig{Mode: ModeNone, Budget: 0}, &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := ValidateConfig(tt.config, tt.support)
-			if tt.wantErr {
-				if err == nil {
-					t.Fatalf("ValidateConfig(%+v, support) error = nil, want %v", tt.config, tt.wantCode)
-				}
-				thinkingErr, ok := err.(*ThinkingError)
-				if !ok {
-					t.Fatalf("ValidateConfig(%+v, support) error type = %T, want *ThinkingError", tt.config, err)
-				}
-				if thinkingErr.Code != tt.wantCode {
-					t.Errorf("ValidateConfig(%+v, support) code = %v, want %v", tt.config, thinkingErr.Code, tt.wantCode)
-				}
-				return
-			}
-			if err != nil {
-				t.Fatalf("ValidateConfig(%+v, support) returned error: %v", tt.config, err)
-			}
-			if got == nil {
-				t.Fatalf("ValidateConfig(%+v, support) returned nil config", tt.config)
-			}
-			if got.Mode != tt.wantMode {
-				t.Errorf("ValidateConfig(%+v, support) Mode = %v, want %v", tt.config, got.Mode, tt.wantMode)
-			}
-			if got.Budget != tt.wantBudget {
-				t.Errorf("ValidateConfig(%+v, support) Budget = %d, want %d", tt.config, got.Budget, tt.wantBudget)
-			}
-			if got.Level != tt.wantLevel {
-				t.Errorf("ValidateConfig(%+v, support) Level = %q, want %q", tt.config, got.Level, tt.wantLevel)
-			}
-		})
-	}
-}
-
-// TestValidationErrorMessages tests error message formatting.
-//
-// Error messages should:
-//   - Be lowercase
-//   - Have no trailing period
-//   - Include context with %s/%d
-//
-// Depends on: Epic 5 Story 5-4 (validation error messages)
-func TestValidationErrorMessages(t *testing.T) {
-	tests := []struct {
-		name         string
-		getErr       func() error
-		wantCode     ErrorCode
-		wantContains string
-	}{
-		{"invalid suffix", func() error {
-			_, err := ParseSuffixWithError("model(abc")
-			return err
-		}, ErrInvalidSuffix, "model(abc"},
-		{"level not supported", func() error {
-			_, err := ValidateConfig(ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}})
-			return err
-		}, ErrLevelNotSupported, "valid levels: low, medium, high"},
-		{"thinking not supported", func() error {
-			_, err := ValidateConfig(ThinkingConfig{Mode: ModeBudget, Budget: 1024}, nil)
-			return err
-		}, ErrThinkingNotSupported, "thinking not supported for this model"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			err := tt.getErr()
-			if err == nil {
-				t.Fatalf("error = nil, want ThinkingError")
-			}
-			thinkingErr, ok := err.(*ThinkingError)
-			if !ok {
-				t.Fatalf("error type = %T, want *ThinkingError", err)
-			}
-			if thinkingErr.Code != tt.wantCode {
-				t.Errorf("code = %v, want %v", thinkingErr.Code, tt.wantCode)
-			}
-			if thinkingErr.Message == "" {
-				t.Fatalf("message is empty")
-			}
-			first, _ := utf8.DecodeRuneInString(thinkingErr.Message)
-			if unicode.IsLetter(first) && !unicode.IsLower(first) {
-				t.Errorf("message does not start with lowercase: %q", thinkingErr.Message)
-			}
-			if strings.HasSuffix(thinkingErr.Message, ".") {
-				t.Errorf("message has trailing period: %q", thinkingErr.Message)
-			}
-			if !strings.Contains(thinkingErr.Message, tt.wantContains) {
-				t.Errorf("message %q does not contain %q", thinkingErr.Message, tt.wantContains)
-			}
-		})
-	}
-}
-
-// TestClampingLogging tests that clamping produces correct log entries.
-//
-// Clamping behavior:
-//   - Normal clamp (budget outside range) → Debug log
-//   - ZeroAllowed=false + zero request → Warn log
-//
-// Depends on: Epic 5 Story 5-1, 5-2
-func TestClampingLogging(t *testing.T) {
-	tests := []struct {
-		name         string
-		useZeroCheck bool
-		budget       int
-		min          int
-		max          int
-		zeroAllowed  bool
-		wantLevel    log.Level
-		wantReason   string
-		wantClamped  int
-	}{
-		{"above max - debug", false, 50000, 128, 32768, false, log.DebugLevel, "", 32768},
-		{"below min - debug", false, 50, 128, 32768, false, log.DebugLevel, "", 128},
-		{"zero not allowed - warn", true, 0, 128, 32768, false, log.WarnLevel, "zero_not_allowed", 128},
-	}
-
-	logger := log.StandardLogger()
-	originalLevel := logger.GetLevel()
-	logger.SetLevel(log.DebugLevel)
-	hook := logtest.NewLocal(logger)
-	t.Cleanup(func() {
-		logger.SetLevel(originalLevel)
-		hook.Reset()
-	})
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			hook.Reset()
-			var got int
-			if tt.useZeroCheck {
-				got = ClampBudgetWithZeroCheck(tt.budget, tt.min, tt.max, tt.zeroAllowed)
-			} else {
-				got = ClampBudget(tt.budget, tt.min, tt.max)
-			}
-			if got != tt.wantClamped {
-				t.Fatalf("clamped budget = %d, want %d", got, tt.wantClamped)
-			}
-
-			entry := hook.LastEntry()
-			if entry == nil {
-				t.Fatalf("no log entry captured")
-			}
-			if entry.Level != tt.wantLevel {
-				t.Errorf("log level = %v, want %v", entry.Level, tt.wantLevel)
-			}
-
-			fields := []string{"original_value", "clamped_to", "min", "max"}
-			for _, key := range fields {
-				if _, ok := entry.Data[key]; !ok {
-					t.Errorf("missing field %q", key)
-				}
-			}
-			if tt.wantReason != "" {
-				if value, ok := entry.Data["reason"]; !ok || value != tt.wantReason {
-					t.Errorf("reason = %v, want %v", value, tt.wantReason)
-				}
-			}
-		})
-	}
-}
diff --git a/test/gemini3_thinking_level_test.go b/test/gemini3_thinking_level_test.go
deleted file mode 100644
index b26bcff3..00000000
--- a/test/gemini3_thinking_level_test.go
+++ /dev/null
@@ -1,423 +0,0 @@
-package test
-
-import (
-	"fmt"
-	"testing"
-	"time"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
-	"github.com/tidwall/gjson"
-)
-
-// registerGemini3Models loads Gemini 3 models into the registry for testing.
-func registerGemini3Models(t *testing.T) func() {
-	t.Helper()
-	reg := registry.GetGlobalRegistry()
-	uid := fmt.Sprintf("gemini3-test-%d", time.Now().UnixNano())
-	reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
-	reg.RegisterClient(uid+"-aistudio", "aistudio", registry.GetAIStudioModels())
-	return func() {
-		reg.UnregisterClient(uid + "-gemini")
-		reg.UnregisterClient(uid + "-aistudio")
-	}
-}
-
-func TestIsGemini3Model(t *testing.T) {
-	cases := []struct {
-		model    string
-		expected bool
-	}{
-		{"gemini-3-pro-preview", true},
-		{"gemini-3-flash-preview", true},
-		{"gemini_3_pro_preview", true},
-		{"gemini-3-pro", true},
-		{"gemini-3-flash", true},
-		{"GEMINI-3-PRO-PREVIEW", true},
-		{"gemini-2.5-pro", false},
-		{"gemini-2.5-flash", false},
-		{"gpt-5", false},
-		{"claude-sonnet-4-5", false},
-		{"", false},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.model, func(t *testing.T) {
-			got := util.IsGemini3Model(cs.model)
-			if got != cs.expected {
-				t.Fatalf("IsGemini3Model(%q) = %v, want %v", cs.model, got, cs.expected)
-			}
-		})
-	}
-}
-
-func TestIsGemini3ProModel(t *testing.T) {
-	cases := []struct {
-		model    string
-		expected bool
-	}{
-		{"gemini-3-pro-preview", true},
-		{"gemini_3_pro_preview", true},
-		{"gemini-3-pro", true},
-		{"GEMINI-3-PRO-PREVIEW", true},
-		{"gemini-3-flash-preview", false},
-		{"gemini-3-flash", false},
-		{"gemini-2.5-pro", false},
-		{"", false},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.model, func(t *testing.T) {
-			got := util.IsGemini3ProModel(cs.model)
-			if got != cs.expected {
-				t.Fatalf("IsGemini3ProModel(%q) = %v, want %v", cs.model, got, cs.expected)
-			}
-		})
-	}
-}
-
-func TestIsGemini3FlashModel(t *testing.T) {
-	cases := []struct {
-		model    string
-		expected bool
-	}{
-		{"gemini-3-flash-preview", true},
-		{"gemini_3_flash_preview", true},
-		{"gemini-3-flash", true},
-		{"GEMINI-3-FLASH-PREVIEW", true},
-		{"gemini-3-pro-preview", false},
-		{"gemini-3-pro", false},
-		{"gemini-2.5-flash", false},
-		{"", false},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.model, func(t *testing.T) {
-			got := util.IsGemini3FlashModel(cs.model)
-			if got != cs.expected {
-				t.Fatalf("IsGemini3FlashModel(%q) = %v, want %v", cs.model, got, cs.expected)
-			}
-		})
-	}
-}
-
-func TestValidateGemini3ThinkingLevel(t *testing.T) {
-	cases := []struct {
-		name    string
-		model   string
-		level   string
-		wantOK  bool
-		wantVal string
-	}{
-		// Gemini 3 Pro: supports "low", "high"
-		{"pro-low", "gemini-3-pro-preview", "low", true, "low"},
-		{"pro-high", "gemini-3-pro-preview", "high", true, "high"},
-		{"pro-minimal-invalid", "gemini-3-pro-preview", "minimal", false, ""},
-		{"pro-medium-invalid", "gemini-3-pro-preview", "medium", false, ""},
-
-		// Gemini 3 Flash: supports "minimal", "low", "medium", "high"
-		{"flash-minimal", "gemini-3-flash-preview", "minimal", true, "minimal"},
-		{"flash-low", "gemini-3-flash-preview", "low", true, "low"},
-		{"flash-medium", "gemini-3-flash-preview", "medium", true, "medium"},
-		{"flash-high", "gemini-3-flash-preview", "high", true, "high"},
-
-		// Case insensitivity
-		{"flash-LOW-case", "gemini-3-flash-preview", "LOW", true, "low"},
-		{"flash-High-case", "gemini-3-flash-preview", "High", true, "high"},
-		{"pro-HIGH-case", "gemini-3-pro-preview", "HIGH", true, "high"},
-
-		// Invalid levels
-		{"flash-invalid", "gemini-3-flash-preview", "xhigh", false, ""},
-		{"flash-invalid-auto", "gemini-3-flash-preview", "auto", false, ""},
-		{"flash-empty", "gemini-3-flash-preview", "", false, ""},
-
-		// Non-Gemini 3 models
-		{"non-gemini3", "gemini-2.5-pro", "high", false, ""},
-		{"gpt5", "gpt-5", "high", false, ""},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.name, func(t *testing.T) {
-			got, ok := util.ValidateGemini3ThinkingLevel(cs.model, cs.level)
-			if ok != cs.wantOK {
-				t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) ok = %v, want %v", cs.model, cs.level, ok, cs.wantOK)
-			}
-			if got != cs.wantVal {
-				t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) = %q, want %q", cs.model, cs.level, got, cs.wantVal)
-			}
-		})
-	}
-}
-
-func TestThinkingBudgetToGemini3Level(t *testing.T) {
-	cases := []struct {
-		name    string
-		model   string
-		budget  int
-		wantOK  bool
-		wantVal string
-	}{
-		// Gemini 3 Pro: maps to "low" or "high"
-		{"pro-dynamic", "gemini-3-pro-preview", -1, true, "high"},
-		{"pro-zero", "gemini-3-pro-preview", 0, true, "low"},
-		{"pro-small", "gemini-3-pro-preview", 1000, true, "low"},
-		{"pro-medium", "gemini-3-pro-preview", 8000, true, "low"},
-		{"pro-large", "gemini-3-pro-preview", 20000, true, "high"},
-		{"pro-huge", "gemini-3-pro-preview", 50000, true, "high"},
-
-		// Gemini 3 Flash: maps to "minimal", "low", "medium", "high"
-		{"flash-dynamic", "gemini-3-flash-preview", -1, true, "high"},
-		{"flash-zero", "gemini-3-flash-preview", 0, true, "minimal"},
-		{"flash-tiny", "gemini-3-flash-preview", 500, true, "minimal"},
-		{"flash-small", "gemini-3-flash-preview", 1000, true, "low"},
-		{"flash-medium-val", "gemini-3-flash-preview", 8000, true, "medium"},
-		{"flash-large", "gemini-3-flash-preview", 20000, true, "high"},
-		{"flash-huge", "gemini-3-flash-preview", 50000, true, "high"},
-
-		// Non-Gemini 3 models should return false
-		{"gemini25-budget", "gemini-2.5-pro", 8000, false, ""},
-		{"gpt5-budget", "gpt-5", 8000, false, ""},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.name, func(t *testing.T) {
-			got, ok := util.ThinkingBudgetToGemini3Level(cs.model, cs.budget)
-			if ok != cs.wantOK {
-				t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) ok = %v, want %v", cs.model, cs.budget, ok, cs.wantOK)
-			}
-			if got != cs.wantVal {
-				t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) = %q, want %q", cs.model, cs.budget, got, cs.wantVal)
-			}
-		})
-	}
-}
-
-func TestApplyGemini3ThinkingLevelFromMetadata(t *testing.T) {
-	cleanup := registerGemini3Models(t)
-	defer cleanup()
-
-	cases := []struct {
-		name         string
-		model        string
-		metadata     map[string]any
-		inputBody    string
-		wantLevel    string
-		wantInclude  bool
-		wantNoChange bool
-	}{
-		{
-			name:        "flash-minimal-from-suffix",
-			model:       "gemini-3-flash-preview",
-			metadata:    map[string]any{"reasoning_effort": "minimal"},
-			inputBody:   `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
-			wantLevel:   "minimal",
-			wantInclude: true,
-		},
-		{
-			name:        "flash-medium-from-suffix",
-			model:       "gemini-3-flash-preview",
-			metadata:    map[string]any{"reasoning_effort": "medium"},
-			inputBody:   `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
-			wantLevel:   "medium",
-			wantInclude: true,
-		},
-		{
-			name:        "pro-high-from-suffix",
-			model:       "gemini-3-pro-preview",
-			metadata:    map[string]any{"reasoning_effort": "high"},
-			inputBody:   `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
-			wantLevel:   "high",
-			wantInclude: true,
-		},
-		{
-			name:         "no-metadata-no-change",
-			model:        "gemini-3-flash-preview",
-			metadata:     nil,
-			inputBody:    `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
-			wantNoChange: true,
-		},
-		{
-			name:         "non-gemini3-no-change",
-			model:        "gemini-2.5-pro",
-			metadata:     map[string]any{"reasoning_effort": "high"},
-			inputBody:    `{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
-			wantNoChange: true,
-		},
-		{
-			name:         "invalid-level-no-change",
-			model:        "gemini-3-flash-preview",
-			metadata:     map[string]any{"reasoning_effort": "xhigh"},
-			inputBody:    `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
-			wantNoChange: true,
-		},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.name, func(t *testing.T) {
-			input := []byte(cs.inputBody)
-			result := util.ApplyGemini3ThinkingLevelFromMetadata(cs.model, cs.metadata, input)
-
-			if cs.wantNoChange {
-				if string(result) != cs.inputBody {
-					t.Fatalf("expected no change, but got: %s", string(result))
-				}
-				return
-			}
-
-			level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
-			if !level.Exists() {
-				t.Fatalf("thinkingLevel not set in result: %s", string(result))
-			}
-			if level.String() != cs.wantLevel {
-				t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
-			}
-
-			include := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts")
-			if cs.wantInclude && (!include.Exists() || !include.Bool()) {
-				t.Fatalf("includeThoughts should be true, got: %s", string(result))
-			}
-		})
-	}
-}
-
-func TestApplyGemini3ThinkingLevelFromMetadataCLI(t *testing.T) {
-	cleanup := registerGemini3Models(t)
-	defer cleanup()
-
-	cases := []struct {
-		name         string
-		model        string
-		metadata     map[string]any
-		inputBody    string
-		wantLevel    string
-		wantInclude  bool
-		wantNoChange bool
-	}{
-		{
-			name:        "flash-minimal-from-suffix-cli",
-			model:       "gemini-3-flash-preview",
-			metadata:    map[string]any{"reasoning_effort": "minimal"},
-			inputBody:   `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
-			wantLevel:   "minimal",
-			wantInclude: true,
-		},
-		{
-			name:        "flash-low-from-suffix-cli",
-			model:       "gemini-3-flash-preview",
-			metadata:    map[string]any{"reasoning_effort": "low"},
-			inputBody:   `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
-			wantLevel:   "low",
-			wantInclude: true,
-		},
-		{
-			name:        "pro-low-from-suffix-cli",
-			model:       "gemini-3-pro-preview",
-			metadata:    map[string]any{"reasoning_effort": "low"},
-			inputBody:   `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
-			wantLevel:   "low",
-			wantInclude: true,
-		},
-		{
-			name:         "no-metadata-no-change-cli",
-			model:        "gemini-3-flash-preview",
-			metadata:     nil,
-			inputBody:    `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
-			wantNoChange: true,
-		},
-		{
-			name:         "non-gemini3-no-change-cli",
-			model:        "gemini-2.5-pro",
-			metadata:     map[string]any{"reasoning_effort": "high"},
-			inputBody:    `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}}`,
-			wantNoChange: true,
-		},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.name, func(t *testing.T) {
-			input := []byte(cs.inputBody)
-			result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(cs.model, cs.metadata, input)
-
-			if cs.wantNoChange {
-				if string(result) != cs.inputBody {
-					t.Fatalf("expected no change, but got: %s", string(result))
-				}
-				return
-			}
-
-			level := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
-			if !level.Exists() {
-				t.Fatalf("thinkingLevel not set in result: %s", string(result))
-			}
-			if level.String() != cs.wantLevel {
-				t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
-			}
-
-			include := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts")
-			if cs.wantInclude && (!include.Exists() || !include.Bool()) {
-				t.Fatalf("includeThoughts should be true, got: %s", string(result))
-			}
-		})
-	}
-}
-
-func TestNormalizeGeminiThinkingBudget_Gemini3Conversion(t *testing.T) {
-	cleanup := registerGemini3Models(t)
-	defer cleanup()
-
-	cases := []struct {
-		name       string
-		model      string
-		inputBody  string
-		wantLevel  string
-		wantBudget bool // if true, expect thinkingBudget instead of thinkingLevel
-	}{
-		{
-			name:      "gemini3-flash-budget-to-level",
-			model:     "gemini-3-flash-preview",
-			inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`,
-			wantLevel: "medium",
-		},
-		{
-			name:      "gemini3-pro-budget-to-level",
-			model:     "gemini-3-pro-preview",
-			inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":20000}}}`,
-			wantLevel: "high",
-		},
-		{
-			name:       "gemini25-keeps-budget",
-			model:      "gemini-2.5-pro",
-			inputBody:  `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`,
-			wantBudget: true,
-		},
-	}
-
-	for _, cs := range cases {
-		t.Run(cs.name, func(t *testing.T) {
-			result := util.NormalizeGeminiThinkingBudget(cs.model, []byte(cs.inputBody))
-
-			if cs.wantBudget {
-				budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget")
-				if !budget.Exists() {
-					t.Fatalf("thinkingBudget should exist for non-Gemini3 model: %s", string(result))
-				}
-				level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
-				if level.Exists() {
-					t.Fatalf("thinkingLevel should not exist for non-Gemini3 model: %s", string(result))
-				}
-			} else {
-				level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
-				if !level.Exists() {
-					t.Fatalf("thinkingLevel should exist for Gemini3 model: %s", string(result))
-				}
-				if level.String() != cs.wantLevel {
-					t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
-				}
-				budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget")
-				if budget.Exists() {
-					t.Fatalf("thinkingBudget should be removed for Gemini3 model: %s", string(result))
-				}
-			}
-		})
-	}
-}
diff --git a/test/model_alias_thinking_suffix_test.go b/test/model_alias_thinking_suffix_test.go
deleted file mode 100644
index 236fca4a..00000000
--- a/test/model_alias_thinking_suffix_test.go
+++ /dev/null
@@ -1,262 +0,0 @@
-package test
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-)
-
-// TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md
-// These tests verify the thinking suffix parsing and application logic across different providers.
-func TestModelAliasThinkingSuffix(t *testing.T) {
-	tests := []struct {
-		id            int
-		name          string
-		provider      string
-		requestModel  string
-		suffixType    string
-		expectedField string // "thinkingBudget", "thinkingLevel", "budget_tokens", "reasoning_effort", "enable_thinking"
-		expectedValue any
-		upstreamModel string // The upstream model after alias resolution
-		isAlias       bool
-	}{
-		// === 1. Antigravity Provider ===
-		// 1.1 Budget-only models (Gemini 2.5)
-		{1, "antigravity_original_numeric", "antigravity", "gemini-2.5-computer-use-preview-10-2025(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", false},
-		{2, "antigravity_alias_numeric", "antigravity", "gp(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", true},
-		// 1.2 Budget+Levels models (Gemini 3)
-		{3, "antigravity_original_numeric_to_level", "antigravity", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{4, "antigravity_original_level", "antigravity", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{5, "antigravity_alias_numeric_to_level", "antigravity", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-		{6, "antigravity_alias_level", "antigravity", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-
-		// === 2. Gemini CLI Provider ===
-		// 2.1 Budget-only models
-		{7, "gemini_cli_original_numeric", "gemini-cli", "gemini-2.5-pro(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", false},
-		{8, "gemini_cli_alias_numeric", "gemini-cli", "g25p(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", true},
-		// 2.2 Budget+Levels models
-		{9, "gemini_cli_original_numeric_to_level", "gemini-cli", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{10, "gemini_cli_original_level", "gemini-cli", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{11, "gemini_cli_alias_numeric_to_level", "gemini-cli", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-		{12, "gemini_cli_alias_level", "gemini-cli", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-
-		// === 3. Vertex Provider ===
-		// 3.1 Budget-only models
-		{13, "vertex_original_numeric", "vertex", "gemini-2.5-pro(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", false},
-		{14, "vertex_alias_numeric", "vertex", "vg25p(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", true},
-		// 3.2 Budget+Levels models
-		{15, "vertex_original_numeric_to_level", "vertex", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{16, "vertex_original_level", "vertex", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{17, "vertex_alias_numeric_to_level", "vertex", "vgf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-		{18, "vertex_alias_level", "vertex", "vgf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-
-		// === 4. AI Studio Provider ===
-		// 4.1 Budget-only models
-		{19, "aistudio_original_numeric", "aistudio", "gemini-2.5-pro(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", false},
-		{20, "aistudio_alias_numeric", "aistudio", "ag25p(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", true},
-		// 4.2 Budget+Levels models
-		{21, "aistudio_original_numeric_to_level", "aistudio", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{22, "aistudio_original_level", "aistudio", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
-		{23, "aistudio_alias_numeric_to_level", "aistudio", "agf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-		{24, "aistudio_alias_level", "aistudio", "agf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
-
-		// === 5. Claude Provider ===
-		{25, "claude_original_numeric", "claude", "claude-sonnet-4-5-20250929(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", false},
-		{26, "claude_alias_numeric", "claude", "cs45(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", true},
-
-		// === 6. Codex Provider ===
-		{27, "codex_original_level", "codex", "gpt-5(high)", "level", "reasoning_effort", "high", "gpt-5", false},
-		{28, "codex_alias_level", "codex", "g5(high)", "level", "reasoning_effort", "high", "gpt-5", true},
-
-		// === 7. Qwen Provider ===
-		{29, "qwen_original_level", "qwen", "qwen3-coder-plus(high)", "level", "enable_thinking", true, "qwen3-coder-plus", false},
-		{30, "qwen_alias_level", "qwen", "qcp(high)", "level", "enable_thinking", true, "qwen3-coder-plus", true},
-
-		// === 8. iFlow Provider ===
-		{31, "iflow_original_level", "iflow", "glm-4.7(high)", "level", "reasoning_effort", "high", "glm-4.7", false},
-		{32, "iflow_alias_level", "iflow", "glm(high)", "level", "reasoning_effort", "high", "glm-4.7", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Step 1: Parse model suffix (simulates SDK layer normalization)
-			// For "gp(1000)" -> requestedModel="gp", metadata={thinking_budget: 1000}
-			requestedModel, metadata := util.NormalizeThinkingModel(tt.requestModel)
-
-			// Verify suffix was parsed
-			if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") {
-				t.Errorf("Case #%d: NormalizeThinkingModel(%q) metadata is nil", tt.id, tt.requestModel)
-				return
-			}
-
-			// Step 2: Simulate OAuth model mapping
-			// Real flow: applyOAuthModelMapping stores requestedModel (the alias) in metadata
-			if tt.isAlias {
-				if metadata == nil {
-					metadata = make(map[string]any)
-				}
-				metadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
-			}
-
-			// Step 3: Verify metadata extraction
-			switch tt.suffixType {
-			case "numeric":
-				budget, _, _, matched := util.ThinkingFromMetadata(metadata)
-				if !matched {
-					t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
-					return
-				}
-				if budget == nil {
-					t.Errorf("Case #%d: expected budget in metadata", tt.id)
-					return
-				}
-				// For thinkingBudget/budget_tokens, verify the parsed budget value
-				if tt.expectedField == "thinkingBudget" || tt.expectedField == "budget_tokens" {
-					expectedBudget := tt.expectedValue.(int)
-					if *budget != expectedBudget {
-						t.Errorf("Case #%d: budget = %d, want %d", tt.id, *budget, expectedBudget)
-					}
-				}
-				// For thinkingLevel (Gemini 3), verify conversion from budget to level
-				if tt.expectedField == "thinkingLevel" {
-					level, ok := util.ThinkingBudgetToGemini3Level(tt.upstreamModel, *budget)
-					if !ok {
-						t.Errorf("Case #%d: ThinkingBudgetToGemini3Level failed", tt.id)
-						return
-					}
-					expectedLevel := tt.expectedValue.(string)
-					if level != expectedLevel {
-						t.Errorf("Case #%d: converted level = %q, want %q", tt.id, level, expectedLevel)
-					}
-				}
-
-			case "level":
-				_, _, effort, matched := util.ThinkingFromMetadata(metadata)
-				if !matched {
-					t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
-					return
-				}
-				if effort == nil {
-					t.Errorf("Case #%d: expected effort in metadata", tt.id)
-					return
-				}
-				if tt.expectedField == "thinkingLevel" || tt.expectedField == "reasoning_effort" {
-					expectedEffort := tt.expectedValue.(string)
-					if *effort != expectedEffort {
-						t.Errorf("Case #%d: effort = %q, want %q", tt.id, *effort, expectedEffort)
-					}
-				}
-			}
-
-			// Step 4: Test Gemini-specific thinkingLevel conversion for Gemini 3 models
-			if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) {
-				body := []byte(`{"request":{"contents":[]}}`)
-
-				// Build metadata simulating real OAuth flow:
-				// - requestedModel (alias like "gf") is stored in model_mapping_original_model
-				// - upstreamModel is passed as the model parameter
-				testMetadata := make(map[string]any)
-				if tt.isAlias {
-					// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
-					testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
-				}
-				// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
-				for k, v := range metadata {
-					testMetadata[k] = v
-				}
-
-				result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(tt.upstreamModel, testMetadata, body)
-				levelVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
-
-				expectedLevel := tt.expectedValue.(string)
-				if !levelVal.Exists() {
-					t.Errorf("Case #%d: expected thinkingLevel in result", tt.id)
-				} else if levelVal.String() != expectedLevel {
-					t.Errorf("Case #%d: thinkingLevel = %q, want %q", tt.id, levelVal.String(), expectedLevel)
-				}
-			}
-
-			// Step 5: Test Gemini 2.5 thinkingBudget application using thinking.ApplyThinking
-			if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) {
-				body := []byte(`{"request":{"contents":[]}}`)
-
-				// Build metadata simulating real OAuth flow:
-				// - requestedModel (alias like "gp") is stored in model_mapping_original_model
-				// - upstreamModel is passed as the model parameter
-				testMetadata := make(map[string]any)
-				if tt.isAlias {
-					// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
-					testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
-				}
-				// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
-				for k, v := range metadata {
-					testMetadata[k] = v
-				}
-
-				// Merge thinking config from metadata into body
-				body = applyThinkingFromMetadata(body, testMetadata)
-
-				// Use thinking.ApplyThinking for unified thinking config handling
-				// Note: ApplyThinking now takes model string, not *ModelInfo
-				result, _ := thinking.ApplyThinking(body, tt.upstreamModel, "gemini-cli")
-
-				budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget")
-
-				expectedBudget := tt.expectedValue.(int)
-				if !budgetVal.Exists() {
-					t.Errorf("Case #%d: expected thinkingBudget in result", tt.id)
-				} else if int(budgetVal.Int()) != expectedBudget {
-					t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget)
-				}
-			}
-		})
-	}
-}
-
-// applyThinkingFromMetadata merges thinking configuration from metadata into the payload.
-func applyThinkingFromMetadata(payload []byte, metadata map[string]any) []byte {
-	if len(metadata) == 0 {
-		return payload
-	}
-
-	// Merge thinking_budget from metadata if present
-	if budget, ok := metadata["thinking_budget"]; ok {
-		if budgetVal, okNum := parseNumberToInt(budget); okNum {
-			payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", budgetVal)
-		}
-	}
-
-	// Merge reasoning_effort from metadata if present
-	if effort, ok := metadata["reasoning_effort"]; ok {
-		if effortStr, okStr := effort.(string); okStr && effortStr != "" {
-			payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingLevel", effortStr)
-		}
-	}
-
-	// Merge thinking_include_thoughts from metadata if present
-	if include, ok := metadata["thinking_include_thoughts"]; ok {
-		if includeBool, okBool := include.(bool); okBool {
-			payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.includeThoughts", includeBool)
-		}
-	}
-
-	return payload
-}
-
-// parseNumberToInt safely converts various numeric types to int
-func parseNumberToInt(raw any) (int, bool) {
-	switch v := raw.(type) {
-	case int:
-		return v, true
-	case int32:
-		return int(v), true
-	case int64:
-		return int(v), true
-	case float64:
-		return int(v), true
-	}
-	return 0, false
-}

From 22624793651a6342b7069b244b97f14c2331fb71 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 19:11:04 +0800
Subject: [PATCH 13/65] refactor(thinking): remove legacy utilities and
 simplify model mapping

---
 internal/util/claude_thinking.go              |  49 ---
 internal/util/gemini_thinking.go              | 104 ------
 internal/util/thinking.go                     | 141 --------
 internal/util/thinking_deprecation_test.go    | 130 -------
 internal/util/thinking_suffix.go              | 319 ------------------
 .../auth/api_key_model_mappings_test.go       |  47 +--
 sdk/cliproxy/auth/conductor.go                |  78 ++---
 sdk/cliproxy/auth/model_name_mappings.go      |  28 +-
 sdk/cliproxy/auth/model_name_mappings_test.go |  12 +-
 9 files changed, 43 insertions(+), 865 deletions(-)
 delete mode 100644 internal/util/claude_thinking.go
 delete mode 100644 internal/util/thinking_deprecation_test.go
 delete mode 100644 internal/util/thinking_suffix.go

diff --git a/internal/util/claude_thinking.go b/internal/util/claude_thinking.go
deleted file mode 100644
index 6176f57d..00000000
--- a/internal/util/claude_thinking.go
+++ /dev/null
@@ -1,49 +0,0 @@
-package util
-
-import (
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-)
-
-// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
-// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
-// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
-func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
-	if budget == nil {
-		return body
-	}
-	if gjson.GetBytes(body, "thinking").Exists() {
-		return body
-	}
-	if *budget <= 0 {
-		return body
-	}
-	updated := body
-	updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
-	updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
-	return updated
-}
-
-// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
-// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
-// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
-func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
-	if !ModelSupportsThinking(modelName) {
-		return nil, false
-	}
-	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
-	if !matched {
-		return nil, false
-	}
-	if include != nil && !*include {
-		return nil, true
-	}
-	if budget == nil {
-		return nil, true
-	}
-	normalized := NormalizeThinkingBudget(modelName, *budget)
-	if normalized <= 0 {
-		return nil, true
-	}
-	return &normalized, true
-}
diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go
index 8e76f3bc..838def6e 100644
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -8,12 +8,6 @@ import (
 	"github.com/tidwall/sjson"
 )
 
-const (
-	GeminiThinkingBudgetMetadataKey  = "gemini_thinking_budget"
-	GeminiIncludeThoughtsMetadataKey = "gemini_include_thoughts"
-	GeminiOriginalModelMetadataKey   = "gemini_original_model"
-)
-
 // Gemini model family detection patterns
 var (
 	gemini3Pattern      = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`)
@@ -297,104 +291,6 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
 	return updated
 }
 
-// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
-// For standard Gemini API format (generationConfig.thinkingConfig path).
-// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
-// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
-func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
-	// Use the alias from metadata if available for model type detection
-	lookupModel := ResolveOriginalModel(model, metadata)
-	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
-		return body
-	}
-
-	// Determine which model to use for validation
-	checkModel := model
-	if IsGemini3Model(lookupModel) {
-		checkModel = lookupModel
-	}
-
-	// First try to get effort string from metadata
-	effort, ok := ReasoningEffortFromMetadata(metadata)
-	if ok && effort != "" {
-		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
-			return ApplyGeminiThinkingLevel(body, level, nil)
-		}
-	}
-
-	// Fallback: check for numeric budget and convert to thinkingLevel
-	budget, _, _, matched := ThinkingFromMetadata(metadata)
-	if matched && budget != nil {
-		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
-			return ApplyGeminiThinkingLevel(body, level, nil)
-		}
-	}
-
-	return body
-}
-
-// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
-// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
-// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
-// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
-func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
-	// Use the alias from metadata if available for model type detection
-	lookupModel := ResolveOriginalModel(model, metadata)
-	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
-		return body
-	}
-
-	// Determine which model to use for validation
-	checkModel := model
-	if IsGemini3Model(lookupModel) {
-		checkModel = lookupModel
-	}
-
-	// First try to get effort string from metadata
-	effort, ok := ReasoningEffortFromMetadata(metadata)
-	if ok && effort != "" {
-		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
-			return ApplyGeminiCLIThinkingLevel(body, level, nil)
-		}
-	}
-
-	// Fallback: check for numeric budget and convert to thinkingLevel
-	budget, _, _, matched := ThinkingFromMetadata(metadata)
-	if matched && budget != nil {
-		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
-			return ApplyGeminiCLIThinkingLevel(body, level, nil)
-		}
-	}
-
-	return body
-}
-
-// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
-// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
-// Returns the modified body if thinkingConfig was added, otherwise returns the original.
-// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
-func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
-	// Use the alias from metadata if available for model property lookup
-	lookupModel := ResolveOriginalModel(model, metadata)
-	if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
-		return body
-	}
-	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
-		return body
-	}
-	// Gemini 3 models use thinkingLevel instead of thinkingBudget
-	if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
-		// Don't set a default - let the API use its dynamic default ("high")
-		// Only set includeThoughts
-		updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
-		return updated
-	}
-	// Gemini 2.5 and other models use thinkingBudget
-	updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-	updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
-	return updated
-}
-
 // StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
 // when the target model does not advertise Thinking capability. It cleans both
 // standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index 3ed4ee42..657a1ff1 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -91,106 +91,6 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
 	return false, 0, 0, false, false
 }
 
-// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
-// Returns nil if the model has no thinking support or no levels defined.
-//
-// Deprecated: Access modelInfo.Thinking.Levels directly.
-func GetModelThinkingLevels(model string) []string {
-	if model == "" {
-		return nil
-	}
-	info := registry.GetGlobalRegistry().GetModelInfo(model)
-	if info == nil || info.Thinking == nil {
-		return nil
-	}
-	return info.Thinking.Levels
-}
-
-// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
-// effort levels instead of numeric budgets.
-//
-// Deprecated: Check len(modelInfo.Thinking.Levels) > 0.
-func ModelUsesThinkingLevels(model string) bool {
-	levels := GetModelThinkingLevels(model)
-	return len(levels) > 0
-}
-
-// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
-// level for the given model. Returns false when the level is not supported.
-//
-// Deprecated: Use thinking.ValidateConfig for level validation.
-func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
-	levels := GetModelThinkingLevels(model)
-	if len(levels) == 0 {
-		return "", false
-	}
-	loweredEffort := strings.ToLower(strings.TrimSpace(effort))
-	for _, lvl := range levels {
-		if strings.ToLower(lvl) == loweredEffort {
-			return lvl, true
-		}
-	}
-	return "", false
-}
-
-// IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
-// These models may not advertise Thinking metadata in the registry.
-//
-// Deprecated: Check modelInfo.Type == "openai-compatibility".
-func IsOpenAICompatibilityModel(model string) bool {
-	if model == "" {
-		return false
-	}
-	info := registry.GetGlobalRegistry().GetModelInfo(model)
-	if info == nil {
-		return false
-	}
-	return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility")
-}
-
-// ThinkingEffortToBudget maps a reasoning effort level to a numeric thinking budget (tokens),
-// clamping the result to the model's supported range.
-//
-// Mappings (values are normalized to model's supported range):
-//   - "none"    -> 0
-//   - "auto"    -> -1
-//   - "minimal" -> 512
-//   - "low"     -> 1024
-//   - "medium"  -> 8192
-//   - "high"    -> 24576
-//   - "xhigh"   -> 32768
-//
-// Returns false when the effort level is empty or unsupported.
-//
-// Deprecated: Use thinking.ConvertLevelToBudget instead.
-func ThinkingEffortToBudget(model, effort string) (int, bool) {
-	if effort == "" {
-		return 0, false
-	}
-	normalized, ok := NormalizeReasoningEffortLevel(model, effort)
-	if !ok {
-		normalized = strings.ToLower(strings.TrimSpace(effort))
-	}
-	switch normalized {
-	case "none":
-		return 0, true
-	case "auto":
-		return NormalizeThinkingBudget(model, -1), true
-	case "minimal":
-		return NormalizeThinkingBudget(model, 512), true
-	case "low":
-		return NormalizeThinkingBudget(model, 1024), true
-	case "medium":
-		return NormalizeThinkingBudget(model, 8192), true
-	case "high":
-		return NormalizeThinkingBudget(model, 24576), true
-	case "xhigh":
-		return NormalizeThinkingBudget(model, 32768), true
-	default:
-		return 0, false
-	}
-}
-
 // ThinkingLevelToBudget maps a Gemini thinkingLevel to a numeric thinking budget (tokens).
 //
 // Mappings:
@@ -220,44 +120,3 @@ func ThinkingLevelToBudget(level string) (int, bool) {
 		return 0, false
 	}
 }
-
-// ThinkingBudgetToEffort maps a numeric thinking budget (tokens)
-// to a reasoning effort level for level-based models.
-//
-// Mappings:
-//   - 0            -> "none" (or lowest supported level if model doesn't support "none")
-//   - -1           -> "auto"
-//   - 1..1024      -> "low"
-//   - 1025..8192   -> "medium"
-//   - 8193..24576  -> "high"
-//   - 24577..      -> highest supported level for the model (defaults to "xhigh")
-//
-// Returns false when the budget is unsupported (negative values other than -1).
-//
-// Deprecated: Use thinking.ConvertBudgetToLevel instead.
-func ThinkingBudgetToEffort(model string, budget int) (string, bool) {
-	switch {
-	case budget == -1:
-		return "auto", true
-	case budget < -1:
-		return "", false
-	case budget == 0:
-		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
-			return levels[0], true
-		}
-		return "none", true
-	case budget > 0 && budget <= 1024:
-		return "low", true
-	case budget <= 8192:
-		return "medium", true
-	case budget <= 24576:
-		return "high", true
-	case budget > 24576:
-		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
-			return levels[len(levels)-1], true
-		}
-		return "xhigh", true
-	default:
-		return "", false
-	}
-}
diff --git a/internal/util/thinking_deprecation_test.go b/internal/util/thinking_deprecation_test.go
deleted file mode 100644
index 6e513874..00000000
--- a/internal/util/thinking_deprecation_test.go
+++ /dev/null
@@ -1,130 +0,0 @@
-package util
-
-import (
-	"go/ast"
-	"go/parser"
-	"go/token"
-	"os"
-	"path/filepath"
-	"runtime"
-	"strings"
-	"testing"
-)
-
-func TestThinkingUtilDeprecationComments(t *testing.T) {
-	dir, err := thinkingSourceDir()
-	if err != nil {
-		t.Fatalf("resolve thinking source dir: %v", err)
-	}
-
-	// Test thinking.go deprecation comments
-	t.Run("thinking.go", func(t *testing.T) {
-		docs := parseFuncDocs(t, filepath.Join(dir, "thinking.go"))
-		tests := []struct {
-			funcName string
-			want     string
-		}{
-			{"ModelSupportsThinking", "Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check."},
-			{"NormalizeThinkingBudget", "Deprecated: Use thinking.ValidateConfig for budget normalization."},
-			{"ThinkingEffortToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."},
-			{"ThinkingBudgetToEffort", "Deprecated: Use thinking.ConvertBudgetToLevel instead."},
-			{"GetModelThinkingLevels", "Deprecated: Access modelInfo.Thinking.Levels directly."},
-			{"ModelUsesThinkingLevels", "Deprecated: Check len(modelInfo.Thinking.Levels) > 0."},
-			{"NormalizeReasoningEffortLevel", "Deprecated: Use thinking.ValidateConfig for level validation."},
-			{"IsOpenAICompatibilityModel", "Deprecated: Check modelInfo.Type == \"openai-compatibility\"."},
-			{"ThinkingLevelToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."},
-		}
-		for _, tt := range tests {
-			t.Run(tt.funcName, func(t *testing.T) {
-				doc, ok := docs[tt.funcName]
-				if !ok {
-					t.Fatalf("missing function %q in thinking.go", tt.funcName)
-				}
-				if !strings.Contains(doc, tt.want) {
-					t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
-				}
-			})
-		}
-	})
-
-	// Test thinking_suffix.go deprecation comments
-	t.Run("thinking_suffix.go", func(t *testing.T) {
-		docs := parseFuncDocs(t, filepath.Join(dir, "thinking_suffix.go"))
-		tests := []struct {
-			funcName string
-			want     string
-		}{
-			{"NormalizeThinkingModel", "Deprecated: Use thinking.ParseSuffix instead."},
-			{"ThinkingFromMetadata", "Deprecated: Access ThinkingConfig fields directly."},
-			{"ResolveThinkingConfigFromMetadata", "Deprecated: Use thinking.ApplyThinking instead."},
-			{"ReasoningEffortFromMetadata", "Deprecated: Use thinking.ConvertBudgetToLevel instead."},
-			{"ResolveOriginalModel", "Deprecated: Parse model suffix with thinking.ParseSuffix."},
-		}
-		for _, tt := range tests {
-			t.Run(tt.funcName, func(t *testing.T) {
-				doc, ok := docs[tt.funcName]
-				if !ok {
-					t.Fatalf("missing function %q in thinking_suffix.go", tt.funcName)
-				}
-				if !strings.Contains(doc, tt.want) {
-					t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
-				}
-			})
-		}
-	})
-
-	// Test thinking_text.go deprecation comments
-	t.Run("thinking_text.go", func(t *testing.T) {
-		docs := parseFuncDocs(t, filepath.Join(dir, "thinking_text.go"))
-		tests := []struct {
-			funcName string
-			want     string
-		}{
-			{"GetThinkingText", "Deprecated: Use thinking package for thinking text extraction."},
-			{"GetThinkingTextFromJSON", "Deprecated: Use thinking package for thinking text extraction."},
-			{"SanitizeThinkingPart", "Deprecated: Use thinking package for thinking part sanitization."},
-			{"StripCacheControl", "Deprecated: Use thinking package for cache control stripping."},
-		}
-		for _, tt := range tests {
-			t.Run(tt.funcName, func(t *testing.T) {
-				doc, ok := docs[tt.funcName]
-				if !ok {
-					t.Fatalf("missing function %q in thinking_text.go", tt.funcName)
-				}
-				if !strings.Contains(doc, tt.want) {
-					t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc)
-				}
-			})
-		}
-	})
-}
-
-func parseFuncDocs(t *testing.T, path string) map[string]string {
-	t.Helper()
-	fset := token.NewFileSet()
-	file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
-	if err != nil {
-		t.Fatalf("parse %s: %v", path, err)
-	}
-	docs := map[string]string{}
-	for _, decl := range file.Decls {
-		fn, ok := decl.(*ast.FuncDecl)
-		if !ok || fn.Recv != nil {
-			continue
-		}
-		if fn.Doc == nil {
-			docs[fn.Name.Name] = ""
-			continue
-		}
-		docs[fn.Name.Name] = fn.Doc.Text()
-	}
-	return docs
-}
-
-func thinkingSourceDir() (string, error) {
-	_, thisFile, _, ok := runtime.Caller(0)
-	if !ok {
-		return "", os.ErrNotExist
-	}
-	return filepath.Dir(thisFile), nil
-}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
deleted file mode 100644
index c02cadaa..00000000
--- a/internal/util/thinking_suffix.go
+++ /dev/null
@@ -1,319 +0,0 @@
-package util
-
-import (
-	"encoding/json"
-	"strconv"
-	"strings"
-)
-
-const (
-	// Deprecated: No longer used. Thinking configuration is now passed via
-	// model name suffix and processed by thinking.ApplyThinking().
-	ThinkingBudgetMetadataKey = "thinking_budget"
-
-	// Deprecated: No longer used. See ThinkingBudgetMetadataKey.
-	ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
-
-	// Deprecated: No longer used. See ThinkingBudgetMetadataKey.
-	ReasoningEffortMetadataKey = "reasoning_effort"
-
-	// Deprecated: No longer used. The original model name (with suffix) is now
-	// preserved directly in the model field. Use thinking.ParseSuffix() to
-	// extract the base model name if needed.
-	ThinkingOriginalModelMetadataKey = "thinking_original_model"
-
-	// ModelMappingOriginalModelMetadataKey stores the client-requested model alias
-	// for OAuth model name mappings. This is NOT deprecated.
-	ModelMappingOriginalModelMetadataKey = "model_mapping_original_model"
-)
-
-// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
-// the normalized base model with extracted metadata. Supported pattern:
-//
-// Deprecated: Use thinking.ParseSuffix instead.
-//   - "(<value>)" where value can be:
-//   - A numeric budget (e.g., "(8192)", "(16384)")
-//   - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
-//
-// Examples:
-//   - "claude-sonnet-4-5-20250929(16384)" → budget=16384
-//   - "gpt-5.1(high)" → reasoning_effort="high"
-//   - "gemini-2.5-pro(32768)" → budget=32768
-//
-// Note: Empty parentheses "()" are not supported and will be ignored.
-func NormalizeThinkingModel(modelName string) (string, map[string]any) {
-	if modelName == "" {
-		return modelName, nil
-	}
-
-	baseModel := modelName
-
-	var (
-		budgetOverride  *int
-		reasoningEffort *string
-		matched         bool
-	)
-
-	// Match "(<value>)" pattern at the end of the model name
-	if idx := strings.LastIndex(modelName, "("); idx != -1 {
-		if !strings.HasSuffix(modelName, ")") {
-			// Incomplete parenthesis, ignore
-			return baseModel, nil
-		}
-
-		value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
-		if value == "" {
-			// Empty parentheses not supported
-			return baseModel, nil
-		}
-
-		candidateBase := modelName[:idx]
-
-		// Auto-detect: pure numeric → budget, string → reasoning effort level
-		if parsed, ok := parseIntPrefix(value); ok {
-			// Numeric value: treat as thinking budget
-			baseModel = candidateBase
-			budgetOverride = &parsed
-			matched = true
-		} else {
-			// String value: treat as reasoning effort level
-			baseModel = candidateBase
-			raw := strings.ToLower(strings.TrimSpace(value))
-			if raw != "" {
-				reasoningEffort = &raw
-				matched = true
-			}
-		}
-	}
-
-	if !matched {
-		return baseModel, nil
-	}
-
-	metadata := map[string]any{
-		ThinkingOriginalModelMetadataKey: modelName,
-	}
-	if budgetOverride != nil {
-		metadata[ThinkingBudgetMetadataKey] = *budgetOverride
-	}
-	if reasoningEffort != nil {
-		metadata[ReasoningEffortMetadataKey] = *reasoningEffort
-	}
-	return baseModel, metadata
-}
-
-// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
-// It accepts both the new generic keys and legacy Gemini-specific keys.
-//
-// Deprecated: Access ThinkingConfig fields directly.
-func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
-	if len(metadata) == 0 {
-		return nil, nil, nil, false
-	}
-
-	var (
-		budgetPtr  *int
-		includePtr *bool
-		effortPtr  *string
-		matched    bool
-	)
-
-	readBudget := func(key string) {
-		if budgetPtr != nil {
-			return
-		}
-		if raw, ok := metadata[key]; ok {
-			if v, okNumber := parseNumberToInt(raw); okNumber {
-				budget := v
-				budgetPtr = &budget
-				matched = true
-			}
-		}
-	}
-
-	readInclude := func(key string) {
-		if includePtr != nil {
-			return
-		}
-		if raw, ok := metadata[key]; ok {
-			switch v := raw.(type) {
-			case bool:
-				val := v
-				includePtr = &val
-				matched = true
-			case *bool:
-				if v != nil {
-					val := *v
-					includePtr = &val
-					matched = true
-				}
-			}
-		}
-	}
-
-	readEffort := func(key string) {
-		if effortPtr != nil {
-			return
-		}
-		if raw, ok := metadata[key]; ok {
-			if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
-				normalized := strings.ToLower(strings.TrimSpace(val))
-				effortPtr = &normalized
-				matched = true
-			}
-		}
-	}
-
-	readBudget(ThinkingBudgetMetadataKey)
-	readBudget(GeminiThinkingBudgetMetadataKey)
-	readInclude(ThinkingIncludeThoughtsMetadataKey)
-	readInclude(GeminiIncludeThoughtsMetadataKey)
-	readEffort(ReasoningEffortMetadataKey)
-	readEffort("reasoning.effort")
-
-	return budgetPtr, includePtr, effortPtr, matched
-}
-
-// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
-// converting reasoning effort strings into budgets when possible.
-//
-// Deprecated: Use thinking.ApplyThinking instead.
-func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
-	budget, include, effort, matched := ThinkingFromMetadata(metadata)
-	if !matched {
-		return nil, nil, false
-	}
-	// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
-	// Claude/Gemini-style protocols, so we don't derive budgets for them here.
-	if ModelUsesThinkingLevels(model) {
-		return nil, nil, false
-	}
-
-	if budget == nil && effort != nil {
-		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
-			budget = &derived
-		}
-	}
-	return budget, include, budget != nil || include != nil || effort != nil
-}
-
-// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
-// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
-//
-// Deprecated: Use thinking.ConvertBudgetToLevel instead.
-func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
-	budget, include, effort, matched := ThinkingFromMetadata(metadata)
-	if !matched {
-		return "", false
-	}
-	if effort != nil && *effort != "" {
-		return strings.ToLower(strings.TrimSpace(*effort)), true
-	}
-	if budget != nil {
-		switch *budget {
-		case -1:
-			return "auto", true
-		case 0:
-			return "none", true
-		}
-	}
-	if include != nil && !*include {
-		return "none", true
-	}
-	return "", true
-}
-
-// ResolveOriginalModel returns the original model name stored in metadata (if present),
-// otherwise falls back to the provided model.
-//
-// Deprecated: Parse model suffix with thinking.ParseSuffix.
-func ResolveOriginalModel(model string, metadata map[string]any) string {
-	normalize := func(name string) string {
-		if name == "" {
-			return ""
-		}
-		if base, _ := NormalizeThinkingModel(name); base != "" {
-			return base
-		}
-		return strings.TrimSpace(name)
-	}
-
-	if metadata != nil {
-		if v, ok := metadata[ModelMappingOriginalModelMetadataKey]; ok {
-			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
-				if base := normalize(s); base != "" {
-					return base
-				}
-			}
-		}
-		if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
-			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
-				if base := normalize(s); base != "" {
-					return base
-				}
-			}
-		}
-		if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
-			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
-				if base := normalize(s); base != "" {
-					return base
-				}
-			}
-		}
-	}
-	// Fallback: try to re-normalize the model name when metadata was dropped.
-	if base := normalize(model); base != "" {
-		return base
-	}
-	return model
-}
-
-func parseIntPrefix(value string) (int, bool) {
-	if value == "" {
-		return 0, false
-	}
-	digits := strings.TrimLeft(value, "-")
-	if digits == "" {
-		return 0, false
-	}
-	end := len(digits)
-	for i := 0; i < len(digits); i++ {
-		if digits[i] < '0' || digits[i] > '9' {
-			end = i
-			break
-		}
-	}
-	if end == 0 {
-		return 0, false
-	}
-	val, err := strconv.Atoi(digits[:end])
-	if err != nil {
-		return 0, false
-	}
-	return val, true
-}
-
-func parseNumberToInt(raw any) (int, bool) {
-	switch v := raw.(type) {
-	case int:
-		return v, true
-	case int32:
-		return int(v), true
-	case int64:
-		return int(v), true
-	case float64:
-		return int(v), true
-	case json.Number:
-		if val, err := v.Int64(); err == nil {
-			return int(val), true
-		}
-	case string:
-		if strings.TrimSpace(v) == "" {
-			return 0, false
-		}
-		if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
-			return parsed, true
-		}
-	}
-	return 0, false
-}
diff --git a/sdk/cliproxy/auth/api_key_model_mappings_test.go b/sdk/cliproxy/auth/api_key_model_mappings_test.go
index fb4dbe86..9f3bd7fe 100644
--- a/sdk/cliproxy/auth/api_key_model_mappings_test.go
+++ b/sdk/cliproxy/auth/api_key_model_mappings_test.go
@@ -149,53 +149,32 @@ func TestApplyAPIKeyModelMapping(t *testing.T) {
 	_, _ = mgr.Register(ctx, apiKeyAuth)
 
 	tests := []struct {
-		name          string
-		auth          *Auth
-		inputModel    string
-		wantModel     string
-		wantOriginal  string
-		expectMapping bool
+		name       string
+		auth       *Auth
+		inputModel string
+		wantModel  string
 	}{
 		{
-			name:          "api_key auth with alias",
-			auth:          apiKeyAuth,
-			inputModel:    "g25p(8192)",
-			wantModel:     "gemini-2.5-pro-exp-03-25(8192)",
-			wantOriginal:  "g25p(8192)",
-			expectMapping: true,
+			name:       "api_key auth with alias",
+			auth:       apiKeyAuth,
+			inputModel: "g25p(8192)",
+			wantModel:  "gemini-2.5-pro-exp-03-25(8192)",
 		},
 		{
-			name:          "oauth auth passthrough",
-			auth:          oauthAuth,
-			inputModel:    "some-model",
-			wantModel:     "some-model",
-			expectMapping: false,
+			name:       "oauth auth passthrough",
+			auth:       oauthAuth,
+			inputModel: "some-model",
+			wantModel:  "some-model",
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			metadata := map[string]any{"existing": "value"}
-			resolvedModel, resultMeta := mgr.applyAPIKeyModelMapping(tt.auth, tt.inputModel, metadata)
+			resolvedModel := mgr.applyAPIKeyModelMapping(tt.auth, tt.inputModel)
 
 			if resolvedModel != tt.wantModel {
 				t.Errorf("model = %q, want %q", resolvedModel, tt.wantModel)
 			}
-
-			if resultMeta["existing"] != "value" {
-				t.Error("existing metadata not preserved")
-			}
-
-			original, hasOriginal := resultMeta["model_mapping_original_model"].(string)
-			if tt.expectMapping {
-				if !hasOriginal || original != tt.wantOriginal {
-					t.Errorf("original model = %q, want %q", original, tt.wantOriginal)
-				}
-			} else {
-				if hasOriginal {
-					t.Error("should not set model_mapping_original_model for non-api_key auth")
-				}
-			}
 		})
 	}
 }
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index 6e035d0f..8b3b41d4 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -752,9 +752,9 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
 			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
 		}
 		execReq := req
-		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
-		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
-		execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model = rewriteModelForAuth(routeModel, auth)
+		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
 		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -801,9 +801,9 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
 			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
 		}
 		execReq := req
-		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
-		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
-		execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model = rewriteModelForAuth(routeModel, auth)
+		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
 		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -850,9 +850,9 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
 			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
 		}
 		execReq := req
-		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
-		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
-		execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model = rewriteModelForAuth(routeModel, auth)
+		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
 		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
 		if errStream != nil {
 			rerr := &Error{Message: errStream.Error()}
@@ -890,72 +890,39 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
 	}
 }
 
-func rewriteModelForAuth(model string, metadata map[string]any, auth *Auth) (string, map[string]any) {
+func rewriteModelForAuth(model string, auth *Auth) string {
 	if auth == nil || model == "" {
-		return model, metadata
+		return model
 	}
 	prefix := strings.TrimSpace(auth.Prefix)
 	if prefix == "" {
-		return model, metadata
+		return model
 	}
 	needle := prefix + "/"
 	if !strings.HasPrefix(model, needle) {
-		return model, metadata
+		return model
 	}
-	rewritten := strings.TrimPrefix(model, needle)
-	return rewritten, stripPrefixFromMetadata(metadata, needle)
+	return strings.TrimPrefix(model, needle)
 }
 
-func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string]any {
-	if len(metadata) == 0 || needle == "" {
-		return metadata
-	}
-	keys := []string{
-		util.GeminiOriginalModelMetadataKey,
-		util.ModelMappingOriginalModelMetadataKey,
-	}
-	var out map[string]any
-	for _, key := range keys {
-		raw, ok := metadata[key]
-		if !ok {
-			continue
-		}
-		value, okStr := raw.(string)
-		if !okStr || !strings.HasPrefix(value, needle) {
-			continue
-		}
-		if out == nil {
-			out = make(map[string]any, len(metadata))
-			for k, v := range metadata {
-				out[k] = v
-			}
-		}
-		out[key] = strings.TrimPrefix(value, needle)
-	}
-	if out == nil {
-		return metadata
-	}
-	return out
-}
-
-func (m *Manager) applyAPIKeyModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) {
+func (m *Manager) applyAPIKeyModelMapping(auth *Auth, requestedModel string) string {
 	if m == nil || auth == nil {
-		return requestedModel, metadata
+		return requestedModel
 	}
 
 	kind, _ := auth.AccountInfo()
 	if !strings.EqualFold(strings.TrimSpace(kind), "api_key") {
-		return requestedModel, metadata
+		return requestedModel
 	}
 
 	requestedModel = strings.TrimSpace(requestedModel)
 	if requestedModel == "" {
-		return requestedModel, metadata
+		return requestedModel
 	}
 
 	// Fast path: lookup per-auth mapping table (keyed by auth.ID).
 	if resolved := m.lookupAPIKeyUpstreamModel(auth.ID, requestedModel); resolved != "" {
-		return applyUpstreamModelOverride(requestedModel, resolved, metadata)
+		return resolved
 	}
 
 	// Slow path: scan config for the matching credential entry and resolve alias.
@@ -980,8 +947,11 @@ func (m *Manager) applyAPIKeyModelMapping(auth *Auth, requestedModel string, met
 		upstreamModel = resolveUpstreamModelForOpenAICompatAPIKey(cfg, auth, requestedModel)
 	}
 
-	// applyUpstreamModelOverride lives in model_name_mappings.go.
-	return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata)
+	// Return upstream model if found, otherwise return requested model.
+	if upstreamModel != "" {
+		return upstreamModel
+	}
+	return requestedModel
 }
 
 // APIKeyConfigEntry is a generic interface for API key configurations.
diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/model_name_mappings.go
index 7fac0b5b..24fcf50f 100644
--- a/sdk/cliproxy/auth/model_name_mappings.go
+++ b/sdk/cliproxy/auth/model_name_mappings.go
@@ -5,7 +5,6 @@ import (
 
 	internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 )
 
 type modelMappingEntry interface {
@@ -71,31 +70,14 @@ func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.Mod
 	m.modelNameMappings.Store(table)
 }
 
-// applyOAuthModelMapping resolves the upstream model from OAuth model mappings
-// and returns the resolved model along with updated metadata. If a mapping exists,
-// the returned model is the upstream model and metadata contains the original
-// requested model for response translation.
-func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) {
+// applyOAuthModelMapping resolves the upstream model from OAuth model mappings.
+// If a mapping exists, the returned model is the upstream model.
+func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string) string {
 	upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel)
-	return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata)
-}
-
-func applyUpstreamModelOverride(requestedModel, upstreamModel string, metadata map[string]any) (string, map[string]any) {
 	if upstreamModel == "" {
-		return requestedModel, metadata
+		return requestedModel
 	}
-
-	out := make(map[string]any, 1)
-	if len(metadata) > 0 {
-		out = make(map[string]any, len(metadata)+1)
-		for k, v := range metadata {
-			out[k] = v
-		}
-	}
-
-	// Preserve the original client model string (including any suffix) for downstream.
-	out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
-	return upstreamModel, out
+	return upstreamModel
 }
 
 func resolveModelAliasFromConfigModels(requestedModel string, models []modelMappingEntry) string {
diff --git a/sdk/cliproxy/auth/model_name_mappings_test.go b/sdk/cliproxy/auth/model_name_mappings_test.go
index 121450cc..77f33bd6 100644
--- a/sdk/cliproxy/auth/model_name_mappings_test.go
+++ b/sdk/cliproxy/auth/model_name_mappings_test.go
@@ -169,19 +169,9 @@ func TestApplyOAuthModelMapping_SuffixPreservation(t *testing.T) {
 	mgr.SetOAuthModelMappings(mappings)
 
 	auth := &Auth{ID: "test-auth-id", Provider: "gemini-cli"}
-	metadata := map[string]any{"existing": "value"}
 
-	resolvedModel, resultMeta := mgr.applyOAuthModelMapping(auth, "gemini-2.5-pro(8192)", metadata)
+	resolvedModel := mgr.applyOAuthModelMapping(auth, "gemini-2.5-pro(8192)")
 	if resolvedModel != "gemini-2.5-pro-exp-03-25(8192)" {
 		t.Errorf("applyOAuthModelMapping() model = %q, want %q", resolvedModel, "gemini-2.5-pro-exp-03-25(8192)")
 	}
-
-	originalModel, ok := resultMeta["model_mapping_original_model"].(string)
-	if !ok || originalModel != "gemini-2.5-pro(8192)" {
-		t.Errorf("applyOAuthModelMapping() metadata[model_mapping_original_model] = %v, want %q", resultMeta["model_mapping_original_model"], "gemini-2.5-pro(8192)")
-	}
-
-	if resultMeta["existing"] != "value" {
-		t.Errorf("applyOAuthModelMapping() metadata[existing] = %v, want %q", resultMeta["existing"], "value")
-	}
 }

From 6e4a602c6070466aaf464571a08c8365d31ab729 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 22:45:07 +0800
Subject: [PATCH 14/65] fix(thinking): map reasoning_effort to thinkingConfig

---
 .../runtime/executor/antigravity_executor.go  |   6 +-
 internal/thinking/convert.go                  |  93 +---
 internal/thinking/suffix.go                   |  24 -
 internal/thinking/text.go                     |  41 ++
 .../claude/antigravity_claude_request.go      |   3 +-
 .../antigravity_openai_request.go             |  32 +-
 .../gemini-cli_openai_request.go              |  23 +-
 .../chat-completions/gemini_openai_request.go |  35 +-
 .../gemini_openai-responses_request.go        |  26 +-
 .../openai/claude/openai_claude_request.go    |   3 +-
 internal/util/gemini_thinking.go              | 513 ------------------
 internal/util/thinking.go                     | 122 -----
 internal/util/thinking_text.go                |  95 ----
 13 files changed, 107 insertions(+), 909 deletions(-)
 create mode 100644 internal/thinking/text.go
 delete mode 100644 internal/util/gemini_thinking.go
 delete mode 100644 internal/util/thinking.go
 delete mode 100644 internal/util/thinking_text.go

diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 90ebb53f..234b06cb 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -1452,15 +1452,15 @@ func generateProjectID() string {
 // This function is called AFTER thinking.ApplyThinking() to apply Claude-specific constraints.
 //
 // It handles:
-//   - Stripping thinking config for unsupported models (via util.StripThinkingConfigIfUnsupported)
+//   - Stripping thinking config for unsupported models
 //   - Normalizing budget to model range (via thinking.ClampBudget)
 //   - For Claude models: ensuring thinking budget < max_tokens
 //   - For Claude models: removing thinkingConfig if budget < minimum allowed
 func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte {
-	payload = util.StripThinkingConfigIfUnsupported(model, payload)
 	modelInfo := registry.LookupModelInfo(model)
 	if modelInfo == nil || modelInfo.Thinking == nil {
-		return payload
+		// Model doesn't support thinking - strip any thinking config
+		return thinking.StripThinkingConfig(payload, "antigravity")
 	}
 	budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
 	if !budget.Exists() {
diff --git a/internal/thinking/convert.go b/internal/thinking/convert.go
index 92e54120..776ccef6 100644
--- a/internal/thinking/convert.go
+++ b/internal/thinking/convert.go
@@ -1,7 +1,6 @@
 package thinking
 
 import (
-	"fmt"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
@@ -111,7 +110,7 @@ const (
 
 // detectModelCapability determines the thinking format capability of a model.
 //
-// This is an internal function used by NormalizeForModel to decide conversion strategy.
+// This is an internal function used by validation and conversion helpers.
 // It analyzes the model's ThinkingSupport configuration to classify the model:
 //   - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking)
 //   - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5)
@@ -141,93 +140,3 @@ func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability {
 		return CapabilityNone
 	}
 }
-
-// normalizeMixedConfig resolves a thinking configuration when both budget and level
-// might be present, applying priority rules.
-//
-// Priority rules (Level takes precedence over Budget):
-//   - If level is non-empty: use level (special handling for "auto" and "none")
-//   - If level is empty and budget is set: use budget
-//   - If neither is set (budget=0, level=""): return ModeNone
-//
-// This function is used internally to handle ambiguous input configurations.
-func normalizeMixedConfig(budget int, level string) ThinkingConfig {
-	normalizedLevel := strings.ToLower(strings.TrimSpace(level))
-	if normalizedLevel != "" {
-		switch normalizedLevel {
-		case string(LevelAuto):
-			return ThinkingConfig{Mode: ModeAuto, Budget: -1, Level: ThinkingLevel(normalizedLevel)}
-		case string(LevelNone):
-			return ThinkingConfig{Mode: ModeNone, Budget: 0, Level: ThinkingLevel(normalizedLevel)}
-		default:
-			return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(normalizedLevel)}
-		}
-	}
-	switch budget {
-	case -1:
-		return ThinkingConfig{Mode: ModeAuto, Budget: -1}
-	case 0:
-		return ThinkingConfig{Mode: ModeNone, Budget: 0}
-	default:
-		return ThinkingConfig{Mode: ModeBudget, Budget: budget}
-	}
-}
-
-// NormalizeForModel normalizes a thinking configuration for a specific model.
-//
-// This function converts the configuration format based on model capabilities:
-//   - Budget-only models (Claude, Gemini 2.5): Level → Budget conversion
-//   - Level-only models (OpenAI, iFlow): Budget → Level conversion
-//   - Hybrid models (Gemini 3): preserve the original format
-//   - No thinking support (Thinking is nil): degrade to ModeNone
-//   - Unknown model (modelInfo is nil): passthrough (preserve original format)
-//
-// Parameters:
-//   - config: The thinking configuration to normalize (must not be nil)
-//   - modelInfo: Model registry information containing ThinkingSupport properties
-//
-// Returns:
-//   - Normalized ThinkingConfig suitable for the model
-//   - Error if conversion fails (e.g., unsupported level or invalid budget)
-func NormalizeForModel(config *ThinkingConfig, modelInfo *registry.ModelInfo) (*ThinkingConfig, error) {
-	if config == nil {
-		return nil, fmt.Errorf("thinking config is nil")
-	}
-
-	normalized := *config
-	capability := detectModelCapability(modelInfo)
-
-	// If model doesn't support thinking, degrade to ModeNone
-	if capability == CapabilityNone && config.Mode != ModeNone && config.Mode != ModeAuto {
-		return &ThinkingConfig{Mode: ModeNone, Budget: 0}, nil
-	}
-
-	switch config.Mode {
-	case ModeAuto, ModeNone:
-		return &normalized, nil
-	case ModeBudget:
-		if capability == CapabilityLevelOnly {
-			level, ok := ConvertBudgetToLevel(config.Budget)
-			if !ok {
-				return nil, fmt.Errorf("invalid budget: %d", config.Budget)
-			}
-			normalized.Mode = ModeLevel
-			normalized.Level = ThinkingLevel(level)
-			normalized.Budget = 0
-		}
-		return &normalized, nil
-	case ModeLevel:
-		if capability == CapabilityBudgetOnly {
-			budget, ok := ConvertLevelToBudget(string(config.Level))
-			if !ok {
-				return nil, fmt.Errorf("unknown level: %s", config.Level)
-			}
-			normalized.Mode = ModeBudget
-			normalized.Budget = budget
-			normalized.Level = ""
-		}
-		return &normalized, nil
-	default:
-		return &normalized, nil
-	}
-}
diff --git a/internal/thinking/suffix.go b/internal/thinking/suffix.go
index e3b4087e..275c0856 100644
--- a/internal/thinking/suffix.go
+++ b/internal/thinking/suffix.go
@@ -5,7 +5,6 @@
 package thinking
 
 import (
-	"fmt"
 	"strconv"
 	"strings"
 )
@@ -44,29 +43,6 @@ func ParseSuffix(model string) SuffixResult {
 	}
 }
 
-// ParseSuffixWithError extracts thinking suffix and returns an error on invalid format.
-//
-// Invalid format cases:
-//   - Contains "(" but does not end with ")"
-//   - Contains ")" without any "("
-//
-// The error message includes the original input for debugging context.
-func ParseSuffixWithError(model string) (SuffixResult, error) {
-	lastOpen := strings.LastIndex(model, "(")
-	if lastOpen == -1 {
-		if strings.Contains(model, ")") {
-			return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model))
-		}
-		return SuffixResult{ModelName: model, HasSuffix: false}, nil
-	}
-
-	if !strings.HasSuffix(model, ")") {
-		return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model))
-	}
-
-	return ParseSuffix(model), nil
-}
-
 // ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value.
 //
 // This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer.
diff --git a/internal/thinking/text.go b/internal/thinking/text.go
new file mode 100644
index 00000000..eed1ba28
--- /dev/null
+++ b/internal/thinking/text.go
@@ -0,0 +1,41 @@
+package thinking
+
+import (
+	"github.com/tidwall/gjson"
+)
+
+// GetThinkingText extracts the thinking text from a content part.
+// Handles various formats:
+// - Simple string: { "thinking": "text" } or { "text": "text" }
+// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
+// - Gemini-style: { "thought": true, "text": "text" }
+// Returns the extracted text string.
+func GetThinkingText(part gjson.Result) string {
+	// Try direct text field first (Gemini-style)
+	if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
+		return text.String()
+	}
+
+	// Try thinking field
+	thinkingField := part.Get("thinking")
+	if !thinkingField.Exists() {
+		return ""
+	}
+
+	// thinking is a string
+	if thinkingField.Type == gjson.String {
+		return thinkingField.String()
+	}
+
+	// thinking is an object with inner text/thinking
+	if thinkingField.IsObject() {
+		if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String {
+			return inner.String()
+		}
+		if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String {
+			return inner.String()
+		}
+	}
+
+	return ""
+}
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index c3e4c63f..593ae8f6 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -13,6 +13,7 @@ import (
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
@@ -123,7 +124,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					contentTypeResult := contentResult.Get("type")
 					if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
 						// Use GetThinkingText to handle wrapped thinking objects
-						thinkingText := util.GetThinkingText(contentResult)
+						thinkingText := thinking.GetThinkingText(contentResult)
 						signatureResult := contentResult.Get("signature")
 						clientSignature := ""
 						if signatureResult.Exists() && signatureResult.String() != "" {
diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 87782a5a..7cfaa6e9 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -36,33 +36,27 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)
 
-	// Reasoning effort -> thinkingBudget/include_thoughts
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
+	modelInfo := registry.LookupModelInfo(modelName)
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
-	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
-	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil {
+	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		if util.IsGemini3Model(modelName) {
-			switch effort {
-			case "none":
-				out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig")
-			case "auto":
-				includeThoughts := true
-				out = util.ApplyGeminiCLIThinkingLevel(out, "", &includeThoughts)
-			default:
-				if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
-					out = util.ApplyGeminiCLIThinkingLevel(out, level, nil)
-				}
+		if effort != "" {
+			thinkingPath := "request.generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 			}
-		} else if len(modelInfo.Thinking.Levels) == 0 {
-			out = util.ApplyReasoningEffortToGeminiCLI(out, effort)
 		}
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index 1a6505d0..09d1dea7 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -36,18 +36,27 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)
 
-	// Reasoning effort -> thinkingBudget/include_thoughts
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
-	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
-	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
+	modelInfo := registry.LookupModelInfo(modelName)
+	if re.Exists() {
+		effort := strings.ToLower(strings.TrimSpace(re.String()))
+		if effort != "" {
+			thinkingPath := "request.generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
+			}
+		}
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 2328ad36..6e1a5014 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -36,36 +36,27 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)
 
-	// Reasoning effort -> thinkingBudget/include_thoughts
-	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
-	// Only apply numeric budgets for models that use budgets (not discrete levels) to avoid
-	// incorrectly applying thinkingBudget for level-based models like gpt-5. Gemini 3 models
-	// use thinkingLevel/includeThoughts instead.
+	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
+	modelInfo := registry.LookupModelInfo(modelName)
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	hasOfficialThinking := re.Exists()
-	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
-	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil {
+	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
-		if util.IsGemini3Model(modelName) {
-			switch effort {
-			case "none":
-				out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig")
-			case "auto":
-				includeThoughts := true
-				out = util.ApplyGeminiThinkingLevel(out, "", &includeThoughts)
-			default:
-				if level, ok := util.ValidateGemini3ThinkingLevel(modelName, effort); ok {
-					out = util.ApplyGeminiThinkingLevel(out, level, nil)
-				}
+		if effort != "" {
+			thinkingPath := "generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
 			}
-		} else if len(modelInfo.Thinking.Levels) == 0 {
-			out = util.ApplyReasoningEffortToGemini(out, effort)
 		}
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index 62e85eef..81bb7d40 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -6,7 +6,6 @@ import (
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -389,18 +388,27 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 		out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
 	}
 
-	// OpenAI official reasoning fields take precedence
-	// Only convert for models that use numeric budgets (not discrete levels).
-	hasOfficialThinking := root.Get("reasoning.effort").Exists()
-	modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
-	if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-		reasoningEffort := root.Get("reasoning.effort")
-		out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
+	// Apply thinking configuration: convert OpenAI Responses API reasoning.effort to Gemini thinkingConfig.
+	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
+	modelInfo := registry.LookupModelInfo(modelName)
+	re := root.Get("reasoning.effort")
+	if re.Exists() {
+		effort := strings.ToLower(strings.TrimSpace(re.String()))
+		if effort != "" {
+			thinkingPath := "generationConfig.thinkingConfig"
+			if effort == "auto" {
+				out, _ = sjson.Set(out, thinkingPath+".thinkingBudget", -1)
+				out, _ = sjson.Set(out, thinkingPath+".includeThoughts", true)
+			} else {
+				out, _ = sjson.Set(out, thinkingPath+".thinkingLevel", effort)
+				out, _ = sjson.Set(out, thinkingPath+".includeThoughts", effort != "none")
+			}
+		}
 	}
 
 	// Cherry Studio extension (applies only when official fields are missing)
 	// Only apply for models that use numeric budgets, not discrete levels.
-	if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
+	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go
index 44cb237e..3817b77b 100644
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -10,7 +10,6 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -130,7 +129,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 					case "thinking":
 						// Only map thinking to reasoning_content for assistant messages (security: prevent injection)
 						if role == "assistant" {
-							thinkingText := util.GetThinkingText(part)
+							thinkingText := thinking.GetThinkingText(part)
 							// Skip empty or whitespace-only thinking
 							if strings.TrimSpace(thinkingText) != "" {
 								reasoningParts = append(reasoningParts, thinkingText)
diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go
deleted file mode 100644
index 838def6e..00000000
--- a/internal/util/gemini_thinking.go
+++ /dev/null
@@ -1,513 +0,0 @@
-package util
-
-import (
-	"regexp"
-	"strings"
-
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-)
-
-// Gemini model family detection patterns
-var (
-	gemini3Pattern      = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`)
-	gemini3ProPattern   = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]pro`)
-	gemini3FlashPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]flash`)
-	gemini25Pattern     = regexp.MustCompile(`(?i)^gemini[_-]?2\.5[_-]`)
-)
-
-// IsGemini3Model returns true if the model is a Gemini 3 family model.
-// Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number).
-func IsGemini3Model(model string) bool {
-	return gemini3Pattern.MatchString(model)
-}
-
-// IsGemini3ProModel returns true if the model is a Gemini 3 Pro variant.
-// Gemini 3 Pro supports thinkingLevel: "low", "high" (default: "high")
-func IsGemini3ProModel(model string) bool {
-	return gemini3ProPattern.MatchString(model)
-}
-
-// IsGemini3FlashModel returns true if the model is a Gemini 3 Flash variant.
-// Gemini 3 Flash supports thinkingLevel: "minimal", "low", "medium", "high" (default: "high")
-func IsGemini3FlashModel(model string) bool {
-	return gemini3FlashPattern.MatchString(model)
-}
-
-// IsGemini25Model returns true if the model is a Gemini 2.5 family model.
-// Gemini 2.5 models should use thinkingBudget (number).
-func IsGemini25Model(model string) bool {
-	return gemini25Pattern.MatchString(model)
-}
-
-// Gemini3ProThinkingLevels are the valid thinkingLevel values for Gemini 3 Pro models.
-var Gemini3ProThinkingLevels = []string{"low", "high"}
-
-// Gemini3FlashThinkingLevels are the valid thinkingLevel values for Gemini 3 Flash models.
-var Gemini3FlashThinkingLevels = []string{"minimal", "low", "medium", "high"}
-
-func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
-	if budget == nil && includeThoughts == nil {
-		return body
-	}
-	updated := body
-	if budget != nil {
-		valuePath := "generationConfig.thinkingConfig.thinkingBudget"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *budget)
-		if err == nil {
-			updated = rewritten
-		}
-	}
-	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
-	incl := includeThoughts
-	if incl == nil && budget != nil && *budget != 0 {
-		defaultInclude := true
-		incl = &defaultInclude
-	}
-	if incl != nil {
-		if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
-			!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
-			valuePath := "generationConfig.thinkingConfig.include_thoughts"
-			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
-			if err == nil {
-				updated = rewritten
-			}
-		}
-	}
-	return updated
-}
-
-func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
-	if budget == nil && includeThoughts == nil {
-		return body
-	}
-	updated := body
-	if budget != nil {
-		valuePath := "request.generationConfig.thinkingConfig.thinkingBudget"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *budget)
-		if err == nil {
-			updated = rewritten
-		}
-	}
-	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
-	incl := includeThoughts
-	if incl == nil && budget != nil && *budget != 0 {
-		defaultInclude := true
-		incl = &defaultInclude
-	}
-	if incl != nil {
-		if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
-			!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
-			valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
-			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
-			if err == nil {
-				updated = rewritten
-			}
-		}
-	}
-	return updated
-}
-
-// ApplyGeminiThinkingLevel applies thinkingLevel config for Gemini 3 models.
-// For standard Gemini API format (generationConfig.thinkingConfig path).
-// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
-func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
-	if level == "" && includeThoughts == nil {
-		return body
-	}
-	updated := body
-	if level != "" {
-		valuePath := "generationConfig.thinkingConfig.thinkingLevel"
-		rewritten, err := sjson.SetBytes(updated, valuePath, level)
-		if err == nil {
-			updated = rewritten
-		}
-	}
-	// Default to including thoughts when a level is set but no explicit include flag is provided.
-	incl := includeThoughts
-	if incl == nil && level != "" {
-		defaultInclude := true
-		incl = &defaultInclude
-	}
-	if incl != nil {
-		if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
-			!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
-			valuePath := "generationConfig.thinkingConfig.includeThoughts"
-			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
-			if err == nil {
-				updated = rewritten
-			}
-		}
-	}
-	if tb := gjson.GetBytes(body, "generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
-		updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig.thinkingBudget")
-	}
-	return updated
-}
-
-// ApplyGeminiCLIThinkingLevel applies thinkingLevel config for Gemini 3 models.
-// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
-// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
-func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
-	if level == "" && includeThoughts == nil {
-		return body
-	}
-	updated := body
-	if level != "" {
-		valuePath := "request.generationConfig.thinkingConfig.thinkingLevel"
-		rewritten, err := sjson.SetBytes(updated, valuePath, level)
-		if err == nil {
-			updated = rewritten
-		}
-	}
-	// Default to including thoughts when a level is set but no explicit include flag is provided.
-	incl := includeThoughts
-	if incl == nil && level != "" {
-		defaultInclude := true
-		incl = &defaultInclude
-	}
-	if incl != nil {
-		if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
-			!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
-			valuePath := "request.generationConfig.thinkingConfig.includeThoughts"
-			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
-			if err == nil {
-				updated = rewritten
-			}
-		}
-	}
-	if tb := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
-		updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig.thinkingBudget")
-	}
-	return updated
-}
-
-// ValidateGemini3ThinkingLevel validates that the thinkingLevel is valid for the Gemini 3 model variant.
-// Returns the validated level (normalized to lowercase) and true if valid, or empty string and false if invalid.
-func ValidateGemini3ThinkingLevel(model, level string) (string, bool) {
-	if level == "" {
-		return "", false
-	}
-	normalized := strings.ToLower(strings.TrimSpace(level))
-
-	var validLevels []string
-	if IsGemini3ProModel(model) {
-		validLevels = Gemini3ProThinkingLevels
-	} else if IsGemini3FlashModel(model) {
-		validLevels = Gemini3FlashThinkingLevels
-	} else if IsGemini3Model(model) {
-		// Unknown Gemini 3 variant - allow all levels as fallback
-		validLevels = Gemini3FlashThinkingLevels
-	} else {
-		return "", false
-	}
-
-	for _, valid := range validLevels {
-		if normalized == valid {
-			return normalized, true
-		}
-	}
-	return "", false
-}
-
-// ThinkingBudgetToGemini3Level converts a thinkingBudget to a thinkingLevel for Gemini 3 models.
-// This provides backward compatibility when thinkingBudget is provided for Gemini 3 models.
-// Returns the appropriate thinkingLevel and true if conversion is possible.
-func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) {
-	if !IsGemini3Model(model) {
-		return "", false
-	}
-
-	// Map budget to level based on Google's documentation
-	// Gemini 3 Pro: "low", "high" (default: "high")
-	// Gemini 3 Flash: "minimal", "low", "medium", "high" (default: "high")
-	switch {
-	case budget == -1:
-		// Dynamic budget maps to "high" (API default)
-		return "high", true
-	case budget == 0:
-		// Zero budget - Gemini 3 doesn't support disabling thinking
-		// Map to lowest available level
-		if IsGemini3FlashModel(model) {
-			return "minimal", true
-		}
-		return "low", true
-	case budget > 0 && budget <= 512:
-		if IsGemini3FlashModel(model) {
-			return "minimal", true
-		}
-		return "low", true
-	case budget <= 1024:
-		return "low", true
-	case budget <= 8192:
-		if IsGemini3FlashModel(model) {
-			return "medium", true
-		}
-		return "low", true // Pro doesn't have medium, use low
-	default:
-		return "high", true
-	}
-}
-
-// modelsWithDefaultThinking lists models that should have thinking enabled by default
-// when no explicit thinkingConfig is provided.
-// Note: Gemini 3 models are NOT included here because per Google's official documentation:
-//   - thinkingLevel defaults to "high" (dynamic thinking)
-//   - includeThoughts defaults to false
-//
-// We should not override these API defaults; let users explicitly configure if needed.
-var modelsWithDefaultThinking = map[string]bool{
-	// "gemini-3-pro-preview":       true,
-	// "gemini-3-pro-image-preview": true,
-	// "gemini-3-flash-preview":     true,
-}
-
-// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
-func ModelHasDefaultThinking(model string) bool {
-	return modelsWithDefaultThinking[model]
-}
-
-// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
-// For standard Gemini API format (generationConfig.thinkingConfig path).
-// Returns the modified body if thinkingConfig was added, otherwise returns the original.
-// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
-func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
-	if !ModelHasDefaultThinking(model) {
-		return body
-	}
-	if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
-		return body
-	}
-	// Gemini 3 models use thinkingLevel instead of thinkingBudget
-	if IsGemini3Model(model) {
-		// Don't set a default - let the API use its dynamic default ("high")
-		// Only set includeThoughts
-		updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.includeThoughts", true)
-		return updated
-	}
-	// Gemini 2.5 and other models use thinkingBudget
-	updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
-	updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
-	return updated
-}
-
-// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
-// when the target model does not advertise Thinking capability. It cleans both
-// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
-// in case upstream injected thinking for an unsupported model.
-func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
-	if ModelSupportsThinking(model) || len(body) == 0 {
-		return body
-	}
-	updated := body
-	// Gemini CLI path
-	updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
-	// Standard Gemini path
-	updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
-	return updated
-}
-
-// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
-// request body (generationConfig.thinkingConfig.thinkingBudget path).
-// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation,
-// unless skipGemini3Check is provided and true.
-func NormalizeGeminiThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte {
-	const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
-	const levelPath = "generationConfig.thinkingConfig.thinkingLevel"
-
-	budget := gjson.GetBytes(body, budgetPath)
-	if !budget.Exists() {
-		return body
-	}
-
-	// For Gemini 3 models, convert thinkingBudget to thinkingLevel
-	skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
-	if IsGemini3Model(model) && !skipGemini3 {
-		if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
-			updated, _ := sjson.SetBytes(body, levelPath, level)
-			updated, _ = sjson.DeleteBytes(updated, budgetPath)
-			return updated
-		}
-		// If conversion fails, just remove the budget (let API use default)
-		updated, _ := sjson.DeleteBytes(body, budgetPath)
-		return updated
-	}
-
-	// For Gemini 2.5 and other models, normalize the budget value
-	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
-	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
-	return updated
-}
-
-// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
-// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
-// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation,
-// unless skipGemini3Check is provided and true.
-func NormalizeGeminiCLIThinkingBudget(model string, body []byte, skipGemini3Check ...bool) []byte {
-	const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
-	const levelPath = "request.generationConfig.thinkingConfig.thinkingLevel"
-
-	budget := gjson.GetBytes(body, budgetPath)
-	if !budget.Exists() {
-		return body
-	}
-
-	// For Gemini 3 models, convert thinkingBudget to thinkingLevel
-	skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
-	if IsGemini3Model(model) && !skipGemini3 {
-		if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
-			updated, _ := sjson.SetBytes(body, levelPath, level)
-			updated, _ = sjson.DeleteBytes(updated, budgetPath)
-			return updated
-		}
-		// If conversion fails, just remove the budget (let API use default)
-		updated, _ := sjson.DeleteBytes(body, budgetPath)
-		return updated
-	}
-
-	// For Gemini 2.5 and other models, normalize the budget value
-	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
-	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
-	return updated
-}
-
-// ReasoningEffortBudgetMapping defines the thinkingBudget values for each reasoning effort level.
-var ReasoningEffortBudgetMapping = map[string]int{
-	"none":    0,
-	"auto":    -1,
-	"minimal": 512,
-	"low":     1024,
-	"medium":  8192,
-	"high":    24576,
-	"xhigh":   32768,
-}
-
-// ApplyReasoningEffortToGemini applies OpenAI reasoning_effort to Gemini thinkingConfig
-// for standard Gemini API format (generationConfig.thinkingConfig path).
-// Returns the modified body with thinkingBudget and include_thoughts set.
-func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
-	normalized := strings.ToLower(strings.TrimSpace(effort))
-	if normalized == "" {
-		return body
-	}
-
-	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
-	includePath := "generationConfig.thinkingConfig.include_thoughts"
-
-	if normalized == "none" {
-		body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
-		return body
-	}
-
-	budget, ok := ReasoningEffortBudgetMapping[normalized]
-	if !ok {
-		return body
-	}
-
-	body, _ = sjson.SetBytes(body, budgetPath, budget)
-	body, _ = sjson.SetBytes(body, includePath, true)
-	return body
-}
-
-// ApplyReasoningEffortToGeminiCLI applies OpenAI reasoning_effort to Gemini CLI thinkingConfig
-// for Gemini CLI API format (request.generationConfig.thinkingConfig path).
-// Returns the modified body with thinkingBudget and include_thoughts set.
-func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
-	normalized := strings.ToLower(strings.TrimSpace(effort))
-	if normalized == "" {
-		return body
-	}
-
-	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
-	includePath := "request.generationConfig.thinkingConfig.include_thoughts"
-
-	if normalized == "none" {
-		body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
-		return body
-	}
-
-	budget, ok := ReasoningEffortBudgetMapping[normalized]
-	if !ok {
-		return body
-	}
-
-	body, _ = sjson.SetBytes(body, budgetPath, budget)
-	body, _ = sjson.SetBytes(body, includePath, true)
-	return body
-}
-
-// ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
-// and converts it to "thinkingBudget" for Gemini 2.5 models.
-// For Gemini 3 models, preserves thinkingLevel unless skipGemini3Check is provided and true.
-// Mappings for Gemini 2.5:
-//   - "high" -> 32768
-//   - "medium" -> 8192
-//   - "low" -> 1024
-//   - "minimal" -> 512
-//
-// It removes "thinkingLevel" after conversion (for Gemini 2.5 only).
-func ConvertThinkingLevelToBudget(body []byte, model string, skipGemini3Check ...bool) []byte {
-	levelPath := "generationConfig.thinkingConfig.thinkingLevel"
-	res := gjson.GetBytes(body, levelPath)
-	if !res.Exists() {
-		return body
-	}
-
-	// For Gemini 3 models, preserve thinkingLevel unless explicitly skipped
-	skipGemini3 := len(skipGemini3Check) > 0 && skipGemini3Check[0]
-	if IsGemini3Model(model) && !skipGemini3 {
-		return body
-	}
-
-	budget, ok := ThinkingLevelToBudget(res.String())
-	if !ok {
-		updated, _ := sjson.DeleteBytes(body, levelPath)
-		return updated
-	}
-
-	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
-	updated, err := sjson.SetBytes(body, budgetPath, budget)
-	if err != nil {
-		return body
-	}
-
-	updated, err = sjson.DeleteBytes(updated, levelPath)
-	if err != nil {
-		return body
-	}
-	return updated
-}
-
-// ConvertThinkingLevelToBudgetCLI checks for "request.generationConfig.thinkingConfig.thinkingLevel"
-// and converts it to "thinkingBudget" for Gemini 2.5 models.
-// For Gemini 3 models, preserves thinkingLevel as-is (does not convert).
-func ConvertThinkingLevelToBudgetCLI(body []byte, model string) []byte {
-	levelPath := "request.generationConfig.thinkingConfig.thinkingLevel"
-	res := gjson.GetBytes(body, levelPath)
-	if !res.Exists() {
-		return body
-	}
-
-	// For Gemini 3 models, preserve thinkingLevel - don't convert to budget
-	if IsGemini3Model(model) {
-		return body
-	}
-
-	budget, ok := ThinkingLevelToBudget(res.String())
-	if !ok {
-		updated, _ := sjson.DeleteBytes(body, levelPath)
-		return updated
-	}
-
-	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
-	updated, err := sjson.SetBytes(body, budgetPath, budget)
-	if err != nil {
-		return body
-	}
-
-	updated, err = sjson.DeleteBytes(updated, levelPath)
-	if err != nil {
-		return body
-	}
-	return updated
-}
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
deleted file mode 100644
index 657a1ff1..00000000
--- a/internal/util/thinking.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package util
-
-import (
-	"strings"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-)
-
-// ModelSupportsThinking reports whether the given model has Thinking capability
-// according to the model registry metadata (provider-agnostic).
-//
-// Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check.
-func ModelSupportsThinking(model string) bool {
-	if model == "" {
-		return false
-	}
-	// First check the global dynamic registry
-	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
-		return info.Thinking != nil
-	}
-	// Fallback: check static model definitions
-	if info := registry.LookupStaticModelInfo(model); info != nil {
-		return info.Thinking != nil
-	}
-	// Fallback: check Antigravity static config
-	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil {
-		return cfg.Thinking != nil
-	}
-	return false
-}
-
-// NormalizeThinkingBudget clamps the requested thinking budget to the
-// supported range for the specified model using registry metadata only.
-// If the model is unknown or has no Thinking metadata, returns the original budget.
-// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
-// or min (0 if zero is allowed and mid <= 0).
-//
-// Deprecated: Use thinking.ValidateConfig for budget normalization.
-func NormalizeThinkingBudget(model string, budget int) int {
-	if budget == -1 { // dynamic
-		if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
-			if dynamicAllowed {
-				return -1
-			}
-			mid := (minBudget + maxBudget) / 2
-			if mid <= 0 && zeroAllowed {
-				return 0
-			}
-			if mid <= 0 {
-				return minBudget
-			}
-			return mid
-		}
-		return -1
-	}
-	if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
-		if budget == 0 {
-			if zeroAllowed {
-				return 0
-			}
-			return minBudget
-		}
-		if budget < minBudget {
-			return minBudget
-		}
-		if budget > maxBudget {
-			return maxBudget
-		}
-		return budget
-	}
-	return budget
-}
-
-// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
-func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
-	if model == "" {
-		return false, 0, 0, false, false
-	}
-	// First check global dynamic registry
-	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil {
-		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
-	}
-	// Fallback: check static model definitions
-	if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil {
-		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
-	}
-	// Fallback: check Antigravity static config
-	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil {
-		return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed
-	}
-	return false, 0, 0, false, false
-}
-
-// ThinkingLevelToBudget maps a Gemini thinkingLevel to a numeric thinking budget (tokens).
-//
-// Mappings:
-//   - "minimal" -> 512
-//   - "low"     -> 1024
-//   - "medium"  -> 8192
-//   - "high"    -> 32768
-//
-// Returns false when the level is empty or unsupported.
-//
-// Deprecated: Use thinking.ConvertLevelToBudget instead.
-func ThinkingLevelToBudget(level string) (int, bool) {
-	if level == "" {
-		return 0, false
-	}
-	normalized := strings.ToLower(strings.TrimSpace(level))
-	switch normalized {
-	case "minimal":
-		return 512, true
-	case "low":
-		return 1024, true
-	case "medium":
-		return 8192, true
-	case "high":
-		return 32768, true
-	default:
-		return 0, false
-	}
-}
diff --git a/internal/util/thinking_text.go b/internal/util/thinking_text.go
deleted file mode 100644
index 7ebb76fc..00000000
--- a/internal/util/thinking_text.go
+++ /dev/null
@@ -1,95 +0,0 @@
-package util
-
-import (
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-)
-
-// GetThinkingText extracts the thinking text from a content part.
-// Handles various formats:
-// - Simple string: { "thinking": "text" } or { "text": "text" }
-// - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } }
-// - Gemini-style: { "thought": true, "text": "text" }
-// Returns the extracted text string.
-//
-// Deprecated: Use thinking package for thinking text extraction.
-func GetThinkingText(part gjson.Result) string {
-	// Try direct text field first (Gemini-style)
-	if text := part.Get("text"); text.Exists() && text.Type == gjson.String {
-		return text.String()
-	}
-
-	// Try thinking field
-	thinkingField := part.Get("thinking")
-	if !thinkingField.Exists() {
-		return ""
-	}
-
-	// thinking is a string
-	if thinkingField.Type == gjson.String {
-		return thinkingField.String()
-	}
-
-	// thinking is an object with inner text/thinking
-	if thinkingField.IsObject() {
-		if inner := thinkingField.Get("text"); inner.Exists() && inner.Type == gjson.String {
-			return inner.String()
-		}
-		if inner := thinkingField.Get("thinking"); inner.Exists() && inner.Type == gjson.String {
-			return inner.String()
-		}
-	}
-
-	return ""
-}
-
-// GetThinkingTextFromJSON extracts thinking text from a raw JSON string.
-//
-// Deprecated: Use thinking package for thinking text extraction.
-func GetThinkingTextFromJSON(jsonStr string) string {
-	return GetThinkingText(gjson.Parse(jsonStr))
-}
-
-// SanitizeThinkingPart normalizes a thinking part to a canonical form.
-// Strips cache_control and other non-essential fields.
-// Returns the sanitized part as JSON string.
-//
-// Deprecated: Use thinking package for thinking part sanitization.
-func SanitizeThinkingPart(part gjson.Result) string {
-	// Gemini-style: { thought: true, text, thoughtSignature }
-	if part.Get("thought").Bool() {
-		result := `{"thought":true}`
-		if text := GetThinkingText(part); text != "" {
-			result, _ = sjson.Set(result, "text", text)
-		}
-		if sig := part.Get("thoughtSignature"); sig.Exists() && sig.Type == gjson.String {
-			result, _ = sjson.Set(result, "thoughtSignature", sig.String())
-		}
-		return result
-	}
-
-	// Anthropic-style: { type: "thinking", thinking, signature }
-	if part.Get("type").String() == "thinking" || part.Get("thinking").Exists() {
-		result := `{"type":"thinking"}`
-		if text := GetThinkingText(part); text != "" {
-			result, _ = sjson.Set(result, "thinking", text)
-		}
-		if sig := part.Get("signature"); sig.Exists() && sig.Type == gjson.String {
-			result, _ = sjson.Set(result, "signature", sig.String())
-		}
-		return result
-	}
-
-	// Not a thinking part, return as-is but strip cache_control
-	return StripCacheControl(part.Raw)
-}
-
-// StripCacheControl removes cache_control and providerOptions from a JSON object.
-//
-// Deprecated: Use thinking package for cache control stripping.
-func StripCacheControl(jsonStr string) string {
-	result := jsonStr
-	result, _ = sjson.Delete(result, "cache_control")
-	result, _ = sjson.Delete(result, "providerOptions")
-	return result
-}

From ed8b0f25eee6754fd08ac890fa8f55b11faf1e19 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 14 Jan 2026 23:02:27 +0800
Subject: [PATCH 15/65] fix(thinking): use LookupModelInfo for model data

---
 internal/runtime/executor/claude_executor.go                    | 2 +-
 .../translator/antigravity/claude/antigravity_claude_request.go | 2 +-
 internal/translator/claude/gemini/claude_gemini_request.go      | 2 +-
 .../claude/openai/chat-completions/claude_openai_request.go     | 2 +-
 .../claude/openai/responses/claude_openai-responses_request.go  | 2 +-
 internal/translator/codex/claude/codex_claude_request.go        | 2 +-
 internal/translator/codex/gemini/codex_gemini_request.go        | 2 +-
 .../translator/gemini-cli/claude/gemini-cli_claude_request.go   | 2 +-
 internal/translator/gemini/claude/gemini_claude_request.go      | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 636fefad..b4cbd450 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -560,7 +560,7 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
 
 	// Look up the model's max completion tokens from the registry
 	maxCompletionTokens := 0
-	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil {
+	if modelInfo := registry.LookupModelInfo(modelName); modelInfo != nil {
 		maxCompletionTokens = modelInfo.MaxCompletionTokens
 	}
 
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index 593ae8f6..2611b5c6 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -388,7 +388,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		modelInfo := registry.LookupModelInfo(modelName)
 		if modelInfo != nil && modelInfo.Thinking != nil {
 			if t.Get("type").String() == "enabled" {
 				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index c987c4b0..8c5b1095 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -118,7 +118,7 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 		// Include thoughts configuration for reasoning process visibility
 		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+			modelInfo := registry.LookupModelInfo(modelName)
 			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 				// Check for thinkingBudget first - if present, enable thinking with budget
 				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index 1ae1f274..3a165a3d 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -67,7 +67,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 	root := gjson.ParseBytes(rawJSON)
 
 	if v := root.Get("reasoning_effort"); v.Exists() {
-		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		modelInfo := registry.LookupModelInfo(modelName)
 		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 			effort := strings.ToLower(strings.TrimSpace(v.String()))
 			if effort != "" {
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
index 3717afa4..479fb78f 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -55,7 +55,7 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
 	root := gjson.ParseBytes(rawJSON)
 
 	if v := root.Get("reasoning.effort"); v.Exists() {
-		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		modelInfo := registry.LookupModelInfo(modelName)
 		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 			effort := strings.ToLower(strings.TrimSpace(v.String()))
 			if effort != "" {
diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go
index e31671b3..a3157833 100644
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -221,7 +221,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Convert thinking.budget_tokens to reasoning.effort for level-based models
 	reasoningEffort := "medium" // default
 	if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		modelInfo := registry.LookupModelInfo(modelName)
 		switch thinkingConfig.Get("type").String() {
 		case "enabled":
 			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go
index f6b258ef..fe5c0a5f 100644
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -253,7 +253,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	reasoningEffort := "medium" // default
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+			modelInfo := registry.LookupModelInfo(modelName)
 			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
 				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
 					budget := int(thinkingBudget.Int())
diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
index f522df81..f1bed88b 100644
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -161,7 +161,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		modelInfo := registry.LookupModelInfo(modelName)
 		if modelInfo != nil && modelInfo.Thinking != nil {
 			if t.Get("type").String() == "enabled" {
 				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go
index a10d00e9..f0dbd513 100644
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -155,7 +155,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
 	// Only apply for models that use numeric budgets, not discrete levels.
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName)
+		modelInfo := registry.LookupModelInfo(modelName)
 		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
 			if t.Get("type").String() == "enabled" {
 				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {

From f6a2d072e66b4efe6435c434f99a8d934787458c Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 09:51:30 +0800
Subject: [PATCH 16/65] refactor(thinking): refine configuration logging

---
 internal/logging/global_logger.go | 25 +++++++++++++++++++++++--
 internal/thinking/apply.go        | 27 ++++++++++++++++-----------
 internal/thinking/types.go        | 16 ++++++++++++++++
 sdk/cliproxy/auth/conductor.go    | 12 ++++++------
 4 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go
index f87b10e4..f78477d4 100644
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"
 	"sync"
 
@@ -52,11 +53,31 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	}
 	levelStr := fmt.Sprintf("%-5s", level)
 
+	// Build fields string (excluding request_id which is already shown)
+	var fieldsStr string
+	if len(entry.Data) > 0 {
+		var keys []string
+		for k := range entry.Data {
+			if k == "request_id" {
+				continue
+			}
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		var fields []string
+		for _, k := range keys {
+			fields = append(fields, fmt.Sprintf("%s=%v", k, entry.Data[k]))
+		}
+		if len(fields) > 0 {
+			fieldsStr = " " + strings.Join(fields, " ")
+		}
+	}
+
 	var formatted string
 	if entry.Caller != nil {
-		formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message)
+		formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s%s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message, fieldsStr)
 	} else {
-		formatted = fmt.Sprintf("[%s] [%s] [%s] %s\n", timestamp, reqID, levelStr, message)
+		formatted = fmt.Sprintf("[%s] [%s] [%s] %s%s\n", timestamp, reqID, levelStr, message, fieldsStr)
 	}
 	buffer.WriteString(formatted)
 
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index 415b721c..45898bd0 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -117,18 +117,21 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	if suffixResult.HasSuffix {
 		config = parseSuffixToConfig(suffixResult.RawSuffix)
 		log.WithFields(log.Fields{
-			"provider":   provider,
-			"model":      model,
-			"raw_suffix": suffixResult.RawSuffix,
-			"config":     config,
-		}).Debug("thinking: using suffix config (priority)")
+			"provider": provider,
+			"model":    model,
+			"mode":     config.Mode,
+			"budget":   config.Budget,
+			"level":    config.Level,
+		}).Debug("thinking: config from model suffix")
 	} else {
 		config = extractThinkingConfig(body, provider)
 		log.WithFields(log.Fields{
 			"provider": provider,
 			"model":    modelInfo.ID,
-			"config":   config,
-		}).Debug("thinking: extracted config from request body")
+			"mode":     config.Mode,
+			"budget":   config.Budget,
+			"level":    config.Level,
+		}).Debug("thinking: original config from request")
 	}
 
 	if !hasThinkingConfig(config) {
@@ -163,10 +166,12 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	}
 
 	log.WithFields(log.Fields{
-		"provider":  provider,
-		"model":     modelInfo.ID,
-		"validated": *validated,
-	}).Debug("thinking: applying validated config")
+		"provider": provider,
+		"model":    modelInfo.ID,
+		"mode":     validated.Mode,
+		"budget":   validated.Budget,
+		"level":    validated.Level,
+	}).Debug("thinking: processed config to apply")
 
 	// 6. Apply configuration using provider-specific applier
 	return applier.Apply(body, *validated, modelInfo)
diff --git a/internal/thinking/types.go b/internal/thinking/types.go
index 7197fa6e..6ae1e088 100644
--- a/internal/thinking/types.go
+++ b/internal/thinking/types.go
@@ -20,6 +20,22 @@ const (
 	ModeAuto
 )
 
+// String returns the string representation of ThinkingMode.
+func (m ThinkingMode) String() string {
+	switch m {
+	case ModeBudget:
+		return "budget"
+	case ModeLevel:
+		return "level"
+	case ModeNone:
+		return "none"
+	case ModeAuto:
+		return "auto"
+	default:
+		return "unknown"
+	}
+}
+
 // ThinkingLevel represents a discrete thinking level.
 type ThinkingLevel string
 
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index 8b3b41d4..d6d8e229 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -591,8 +591,8 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
 		}
 		execReq := req
-		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
-		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model = rewriteModelForAuth(routeModel, auth)
+		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
 		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -639,8 +639,8 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
 		}
 		execReq := req
-		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
-		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model = rewriteModelForAuth(routeModel, auth)
+		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
 		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -687,8 +687,8 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
 		}
 		execReq := req
-		execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
-		execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
+		execReq.Model = rewriteModelForAuth(routeModel, auth)
+		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
 		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
 		if errStream != nil {
 			rerr := &Error{Message: errStream.Error()}

From 847be0e99db618da078e0c311cae9772efc090ed Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 10:18:22 +0800
Subject: [PATCH 17/65] fix(auth): use base model name for auth matching by
 stripping suffix

---
 sdk/cliproxy/auth/conductor.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index d6d8e229..709f640a 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -1743,6 +1743,13 @@ func (m *Manager) pickNextMixed(ctx context.Context, providers []string, model s
 	m.mu.RLock()
 	candidates := make([]*Auth, 0, len(m.auths))
 	modelKey := strings.TrimSpace(model)
+	// Always use base model name (without thinking suffix) for auth matching.
+	if modelKey != "" {
+		parsed := thinking.ParseSuffix(modelKey)
+		if parsed.ModelName != "" {
+			modelKey = strings.TrimSpace(parsed.ModelName)
+		}
+	}
 	registryRef := registry.GetGlobalRegistry()
 	for _, candidate := range m.auths {
 		if candidate == nil || candidate.Disabled {

From 1fbbba6f595862afc803d0d5a3492514f44f8eaa Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 10:44:54 +0800
Subject: [PATCH 18/65] feat(logging): order log fields for improved
 readability

---
 internal/logging/global_logger.go | 24 ++++++++++++++----------
 internal/thinking/apply.go        |  2 +-
 internal/thinking/validate.go     | 14 ++++++--------
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go
index f78477d4..3b034dc6 100644
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -6,7 +6,6 @@ import (
 	"io"
 	"os"
 	"path/filepath"
-	"sort"
 	"strings"
 	"sync"
 
@@ -30,6 +29,9 @@ var (
 // Format: [2025-12-23 20:14:04] [debug] [manager.go:524] | a1b2c3d4 | Use API key sk-9...0RHO for model gpt-5.2
 type LogFormatter struct{}
 
+// logFieldOrder defines the display order for common log fields.
+var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_value", "min", "max", "clamped_to", "error"}
+
 // Format renders a single log entry with custom formatting.
 func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	var buffer *bytes.Buffer
@@ -56,17 +58,19 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	// Build fields string (excluding request_id which is already shown)
 	var fieldsStr string
 	if len(entry.Data) > 0 {
-		var keys []string
-		for k := range entry.Data {
-			if k == "request_id" {
+		seen := make(map[string]bool)
+		var fields []string
+		for _, k := range logFieldOrder {
+			if v, ok := entry.Data[k]; ok {
+				fields = append(fields, fmt.Sprintf("%s=%v", k, v))
+				seen[k] = true
+			}
+		}
+		for k, v := range entry.Data {
+			if k == "request_id" || seen[k] {
 				continue
 			}
-			keys = append(keys, k)
-		}
-		sort.Strings(keys)
-		var fields []string
-		for _, k := range keys {
-			fields = append(fields, fmt.Sprintf("%s=%v", k, entry.Data[k]))
+			fields = append(fields, fmt.Sprintf("%s=%v", k, v))
 		}
 		if len(fields) > 0 {
 			fieldsStr = " " + strings.Join(fields, " ")
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index 45898bd0..0b26ca0b 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -149,7 +149,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 			"provider": provider,
 			"model":    modelInfo.ID,
 			"error":    err.Error(),
-		}).Warn("thinking: validation failed, returning original body")
+		}).Warn("thinking: validation failed")
 		// Return original body on validation failure (defensive programming).
 		// This ensures callers who ignore the error won't receive nil body.
 		// The upstream service will decide how to handle the unmodified request.
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index 66f8160c..886f3161 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -76,12 +76,10 @@ func ClampBudgetWithZeroCheck(value, min, max int, zeroAllowed bool) int {
 			return 0
 		}
 		log.WithFields(log.Fields{
-			"original_value": value,
-			"clamped_to":     min,
-			"min":            min,
-			"max":            max,
-			"reason":         "zero_not_allowed",
-		}).Warn("budget clamped: zero not allowed")
+			"clamped_to": min,
+			"min":        min,
+			"max":        max,
+		}).Warn("thinking: budget zero not allowed")
 		return min
 	}
 
@@ -253,8 +251,8 @@ func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupp
 func logClamp(original, clampedTo, min, max int) {
 	log.WithFields(log.Fields{
 		"original_value": original,
-		"clamped_to":     clampedTo,
 		"min":            min,
 		"max":            max,
-	}).Debug("budget clamped: value outside model range")
+		"clamped_to":     clampedTo,
+	}).Debug("thinking: budget clamped")
 }

From 5a77b7728e4d76222ad5c9dcde6b0e17c1160542 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 11:29:53 +0800
Subject: [PATCH 19/65] refactor(thinking): improve budget clamping and logging
 with provider/model context

---
 .../runtime/executor/antigravity_executor.go  |   2 +-
 internal/thinking/apply.go                    |  40 +++--
 internal/thinking/validate.go                 | 143 ++++++++----------
 .../claude/gemini/claude_gemini_request.go    |   4 +-
 4 files changed, 95 insertions(+), 94 deletions(-)

diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 234b06cb..46b2d4ea 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -1467,7 +1467,7 @@ func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) [
 		return payload
 	}
 	raw := int(budget.Int())
-	normalized := thinking.ClampBudget(raw, modelInfo.Thinking.Min, modelInfo.Thinking.Max)
+	normalized := thinking.ClampBudget(raw, modelInfo, "antigravity")
 
 	if isClaude {
 		effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index 0b26ca0b..ce210dfb 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -84,7 +84,10 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	// 1. Route check: Get provider applier
 	applier := GetProviderApplier(provider)
 	if applier == nil {
-		log.WithField("provider", provider).Debug("thinking: unknown provider, passthrough")
+		log.WithFields(log.Fields{
+			"provider": provider,
+			"model":    model,
+		}).Debug("thinking: unknown provider, passthrough")
 		return body, nil
 	}
 
@@ -108,14 +111,17 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 			}).Debug("thinking: model does not support thinking, stripping config")
 			return StripThinkingConfig(body, provider), nil
 		}
-		log.WithField("model", baseModel).Debug("thinking: model does not support thinking, passthrough")
+		log.WithFields(log.Fields{
+			"provider": provider,
+			"model":    baseModel,
+		}).Debug("thinking: model does not support thinking, passthrough")
 		return body, nil
 	}
 
 	// 4. Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
-		config = parseSuffixToConfig(suffixResult.RawSuffix)
+		config = parseSuffixToConfig(suffixResult.RawSuffix, provider, model)
 		log.WithFields(log.Fields{
 			"provider": provider,
 			"model":    model,
@@ -125,13 +131,15 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 		}).Debug("thinking: config from model suffix")
 	} else {
 		config = extractThinkingConfig(body, provider)
-		log.WithFields(log.Fields{
-			"provider": provider,
-			"model":    modelInfo.ID,
-			"mode":     config.Mode,
-			"budget":   config.Budget,
-			"level":    config.Level,
-		}).Debug("thinking: original config from request")
+		if hasThinkingConfig(config) {
+			log.WithFields(log.Fields{
+				"provider": provider,
+				"model":    modelInfo.ID,
+				"mode":     config.Mode,
+				"budget":   config.Budget,
+				"level":    config.Level,
+			}).Debug("thinking: original config from request")
+		}
 	}
 
 	if !hasThinkingConfig(config) {
@@ -143,7 +151,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	}
 
 	// 5. Validate and normalize configuration
-	validated, err := ValidateConfig(config, modelInfo.Thinking)
+	validated, err := ValidateConfig(config, modelInfo, provider)
 	if err != nil {
 		log.WithFields(log.Fields{
 			"provider": provider,
@@ -185,7 +193,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 //  3. Numeric values: positive integers → ModeBudget, 0 → ModeNone
 //
 // If none of the above match, returns empty ThinkingConfig (treated as no config).
-func parseSuffixToConfig(rawSuffix string) ThinkingConfig {
+func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig {
 	// 1. Try special values first (none, auto, -1)
 	if mode, ok := ParseSpecialSuffix(rawSuffix); ok {
 		switch mode {
@@ -210,7 +218,11 @@ func parseSuffixToConfig(rawSuffix string) ThinkingConfig {
 	}
 
 	// Unknown suffix format - return empty config
-	log.WithField("raw_suffix", rawSuffix).Debug("thinking: unknown suffix format, treating as no config")
+	log.WithFields(log.Fields{
+		"provider":   provider,
+		"model":      model,
+		"raw_suffix": rawSuffix,
+	}).Debug("thinking: unknown suffix format, treating as no config")
 	return ThinkingConfig{}
 }
 
@@ -228,7 +240,7 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 	// Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
-		config = parseSuffixToConfig(suffixResult.RawSuffix)
+		config = parseSuffixToConfig(suffixResult.RawSuffix, provider, modelID)
 	} else {
 		config = extractThinkingConfig(body, provider)
 	}
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index 886f3161..3a92ec12 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -9,81 +9,59 @@ import (
 	log "github.com/sirupsen/logrus"
 )
 
-// ClampBudget clamps a budget value to the specified range [min, max].
-//
-// This function ensures budget values stay within model-supported bounds.
-// When clamping occurs, a Debug-level log is recorded.
-//
-// Special handling:
-//   - Auto value (-1) passes through without clamping
-//   - Values below min are clamped to min
-//   - Values above max are clamped to max
-//
-// Parameters:
-//   - value: The budget value to clamp
-//   - min: Minimum allowed budget (inclusive)
-//   - max: Maximum allowed budget (inclusive)
-//
-// Returns:
-//   - The clamped budget value (min ≤ result ≤ max, or -1 for auto)
+// ClampBudget clamps a budget value to the model's supported range.
 //
 // Logging:
-//   - Debug level when value is clamped (either to min or max)
-//   - Fields: original_value, clamped_to, min, max
-func ClampBudget(value, min, max int) int {
-	// Auto value (-1) passes through without clamping
+//   - Warn when value=0 but ZeroAllowed=false
+//   - Debug when value is clamped to min/max
+//
+// Fields: provider, model, original_value, clamped_to, min, max
+func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
+	model := "unknown"
+	support := (*registry.ThinkingSupport)(nil)
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		support = modelInfo.Thinking
+	}
+	if support == nil {
+		return value
+	}
+
+	// Auto value (-1) passes through without clamping.
 	if value == -1 {
 		return value
 	}
 
-	// Clamp to min if below
-	if value < min {
-		logClamp(value, min, min, max)
-		return min
-	}
-
-	// Clamp to max if above
-	if value > max {
-		logClamp(value, max, min, max)
-		return max
-	}
-
-	// Within range, return original
-	return value
-}
-
-// ClampBudgetWithZeroCheck clamps a budget value to the specified range [min, max]
-// while honoring the ZeroAllowed constraint.
-//
-// This function extends ClampBudget with ZeroAllowed boundary handling.
-// When zeroAllowed is false and value is 0, the value is clamped to min and logged.
-//
-// Parameters:
-//   - value: The budget value to clamp
-//   - min: Minimum allowed budget (inclusive)
-//   - max: Maximum allowed budget (inclusive)
-//   - zeroAllowed: Whether 0 (thinking disabled) is allowed
-//
-// Returns:
-//   - The clamped budget value (min ≤ result ≤ max, or -1 for auto)
-//
-// Logging:
-//   - Warn level when zeroAllowed=false and value=0 (zero not allowed for model)
-//   - Fields: original_value, clamped_to, reason
-func ClampBudgetWithZeroCheck(value, min, max int, zeroAllowed bool) int {
-	if value == 0 {
-		if zeroAllowed {
-			return 0
-		}
+	min := support.Min
+	max := support.Max
+	if value == 0 && !support.ZeroAllowed {
 		log.WithFields(log.Fields{
-			"clamped_to": min,
-			"min":        min,
-			"max":        max,
+			"provider":       provider,
+			"model":          model,
+			"original_value": value,
+			"clamped_to":     min,
+			"min":            min,
+			"max":            max,
 		}).Warn("thinking: budget zero not allowed")
 		return min
 	}
 
-	return ClampBudget(value, min, max)
+	// Some models are level-only and do not define numeric budget ranges.
+	if min == 0 && max == 0 {
+		return value
+	}
+
+	if value < min {
+		logClamp(provider, model, value, min, min, max)
+		return min
+	}
+	if value > max {
+		logClamp(provider, model, value, max, min, max)
+		return max
+	}
+	return value
 }
 
 // ValidateConfig validates a thinking configuration against model capabilities.
@@ -106,16 +84,26 @@ func ClampBudgetWithZeroCheck(value, min, max int, zeroAllowed bool) int {
 //   - Budget-only model + Level config → Level converted to Budget
 //   - Level-only model + Budget config → Budget converted to Level
 //   - Hybrid model → preserve original format
-func ValidateConfig(config ThinkingConfig, support *registry.ThinkingSupport) (*ThinkingConfig, error) {
+func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provider string) (*ThinkingConfig, error) {
 	normalized := config
+
+	model := "unknown"
+	support := (*registry.ThinkingSupport)(nil)
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		support = modelInfo.Thinking
+	}
+
 	if support == nil {
 		if config.Mode != ModeNone {
-			return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "unknown")
+			return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", model)
 		}
 		return &normalized, nil
 	}
 
-	capability := detectModelCapability(&registry.ModelInfo{Thinking: support})
+	capability := detectModelCapability(modelInfo)
 	switch capability {
 	case CapabilityBudgetOnly:
 		if normalized.Mode == ModeLevel {
@@ -168,13 +156,12 @@ func ValidateConfig(config ThinkingConfig, support *registry.ThinkingSupport) (*
 
 	// Convert ModeAuto to mid-range if dynamic not allowed
 	if normalized.Mode == ModeAuto && !support.DynamicAllowed {
-		normalized = convertAutoToMidRange(normalized, support)
+		normalized = convertAutoToMidRange(normalized, support, provider, model)
 	}
 
 	switch normalized.Mode {
 	case ModeBudget, ModeAuto, ModeNone:
-		clamped := ClampBudgetWithZeroCheck(normalized.Budget, support.Min, support.Max, support.ZeroAllowed)
-		normalized.Budget = clamped
+		normalized.Budget = ClampBudget(normalized.Budget, modelInfo, provider)
 	}
 
 	// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
@@ -213,17 +200,18 @@ func normalizeLevels(levels []string) []string {
 // Logging:
 //   - Debug level when conversion occurs
 //   - Fields: original_mode, clamped_to, reason
-func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport) ThinkingConfig {
+func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport, provider, model string) ThinkingConfig {
 	// For level-only models (has Levels but no Min/Max range), use ModeLevel with medium
 	if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 {
 		config.Mode = ModeLevel
 		config.Level = LevelMedium
 		config.Budget = 0
 		log.WithFields(log.Fields{
+			"provider":      provider,
+			"model":         model,
 			"original_mode": "auto",
 			"clamped_to":    string(LevelMedium),
-			"reason":        "dynamic_not_allowed_level_only",
-		}).Debug("thinking mode converted: dynamic not allowed, using medium level")
+		}).Debug("thinking: mode converted: dynamic not allowed, using medium level")
 		return config
 	}
 
@@ -240,16 +228,19 @@ func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupp
 		config.Budget = mid
 	}
 	log.WithFields(log.Fields{
+		"provider":      provider,
+		"model":         model,
 		"original_mode": "auto",
 		"clamped_to":    config.Budget,
-		"reason":        "dynamic_not_allowed",
-	}).Debug("thinking mode converted: dynamic not allowed")
+	}).Debug("thinking: mode converted: dynamic not allowed")
 	return config
 }
 
 // logClamp logs a debug message when budget clamping occurs.
-func logClamp(original, clampedTo, min, max int) {
+func logClamp(provider, model string, original, clampedTo, min, max int) {
 	log.WithFields(log.Fields{
+		"provider":       provider,
+		"model":          model,
 		"original_value": original,
 		"min":            min,
 		"max":            max,
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index 8c5b1095..89857693 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -16,7 +16,6 @@ import (
 
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -123,8 +122,7 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 				// Check for thinkingBudget first - if present, enable thinking with budget
 				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					normalizedBudget := thinking.ClampBudget(int(thinkingBudget.Int()), modelInfo.Thinking.Min, modelInfo.Thinking.Max)
-					out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
+					out, _ = sjson.Set(out, "thinking.budget_tokens", thinkingBudget.Int())
 				} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
 					// Fallback to include_thoughts if no budget specified
 					out, _ = sjson.Set(out, "thinking.type", "enabled")

From bcd4d9595f0caa3167303181155716596a444f6e Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 11:53:54 +0800
Subject: [PATCH 20/65] fix(thinking): refine ModeNone handling based on
 provider capabilities

---
 internal/thinking/validate.go | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index 3a92ec12..799c8005 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -159,15 +159,27 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 		normalized = convertAutoToMidRange(normalized, support, provider, model)
 	}
 
-	switch normalized.Mode {
-	case ModeBudget, ModeAuto, ModeNone:
-		normalized.Budget = ClampBudget(normalized.Budget, modelInfo, provider)
+	// ModeNone handling varies by provider/model capability:
+	// - Claude can always disable thinking via thinking.type="disabled".
+	// - Some models/providers cannot be fully disabled; for those we force minimal thinking
+	//   while hiding thoughts (ModeNone + Budget>0 and/or Level set).
+	if normalized.Mode == ModeNone {
+		if provider == "claude" {
+			normalized.Budget = 0
+			normalized.Level = ""
+		} else if !support.ZeroAllowed {
+			if support.Min > 0 {
+				normalized.Budget = support.Min
+			}
+			if normalized.Level == "" && normalized.Budget > 0 && len(support.Levels) > 0 {
+				normalized.Level = ThinkingLevel(support.Levels[0])
+			}
+		}
 	}
 
-	// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
-	// This ensures Apply layer doesn't need to access support.Levels
-	if normalized.Mode == ModeNone && normalized.Budget > 0 && len(support.Levels) > 0 {
-		normalized.Level = ThinkingLevel(support.Levels[0])
+	switch normalized.Mode {
+	case ModeBudget, ModeAuto:
+		normalized.Budget = ClampBudget(normalized.Budget, modelInfo, provider)
 	}
 
 	return &normalized, nil

From 8bc6df329f20edbe51dfe99ae9373dd4ff89e0b0 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 12:34:05 +0800
Subject: [PATCH 21/65] fix(auth): apply API key model mapping to request model

---
 sdk/cliproxy/auth/conductor.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index 709f640a..f7605d87 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -593,6 +593,7 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
 		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
 		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -641,6 +642,7 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
 		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
 		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -689,6 +691,7 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
 		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
 		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
 		if errStream != nil {
 			rerr := &Error{Message: errStream.Error()}

From ee2976cca06ba15924e6860c6bf379ef8d980439 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 13:06:26 +0800
Subject: [PATCH 22/65] refactor(thinking): improve logging for user-defined
 models

---
 internal/thinking/apply.go | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index ce210dfb..a2919cea 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -247,31 +247,27 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 
 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
-			"model":        modelID,
-			"provider":     provider,
-			"user_defined": true,
-			"passthrough":  true,
-		}).Debug("thinking: user-defined model, no config, passthrough")
+			"model":    modelID,
+			"provider": provider,
+		}).Debug("thinking: user-defined model, passthrough (no config)")
 		return body, nil
 	}
 
 	applier := GetProviderApplier(provider)
 	if applier == nil {
 		log.WithFields(log.Fields{
-			"model":        modelID,
-			"provider":     provider,
-			"user_defined": true,
-			"passthrough":  true,
-		}).Debug("thinking: user-defined model, unknown provider, passthrough")
+			"model":    modelID,
+			"provider": provider,
+		}).Debug("thinking: user-defined model, passthrough (unknown provider)")
 		return body, nil
 	}
 
 	log.WithFields(log.Fields{
-		"model":        modelID,
-		"provider":     provider,
-		"user_defined": true,
-		"passthrough":  false,
-		"config":       config,
+		"provider": provider,
+		"model":    modelID,
+		"mode":     config.Mode,
+		"budget":   config.Budget,
+		"level":    config.Level,
 	}).Debug("thinking: applying config for user-defined model (skip validation)")
 
 	return applier.Apply(body, config, modelInfo)

From 086eb3df7adf7e45f0ae5fc1af73e8c0394cddb4 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Thu, 15 Jan 2026 13:22:46 +0800
Subject: [PATCH 23/65] refactor(auth): simplify file handling logic and remove
 redundant comparison functions feat(auth): fetch and update Antigravity
 project ID from metadata during filestore operations

- Added support to retrieve and update `project_id` using the access token if missing in metadata.
- Integrated HTTP client to fetch project ID dynamically.
- Enhanced metadata persistence logic.
---
 .../runtime/executor/antigravity_executor.go  |  42 +++++-
 sdk/auth/filestore.go                         | 140 +++++-------------
 2 files changed, 80 insertions(+), 102 deletions(-)

diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 8d1ef23d..8dfccef9 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -25,6 +25,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -1104,12 +1105,49 @@ func (e *AntigravityExecutor) refreshToken(ctx context.Context, auth *cliproxyau
 		auth.Metadata["refresh_token"] = tokenResp.RefreshToken
 	}
 	auth.Metadata["expires_in"] = tokenResp.ExpiresIn
-	auth.Metadata["timestamp"] = time.Now().UnixMilli()
-	auth.Metadata["expired"] = time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339)
+	now := time.Now()
+	auth.Metadata["timestamp"] = now.UnixMilli()
+	auth.Metadata["expired"] = now.Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339)
 	auth.Metadata["type"] = antigravityAuthType
+	if errProject := e.ensureAntigravityProjectID(ctx, auth, tokenResp.AccessToken); errProject != nil {
+		log.Warnf("antigravity executor: ensure project id failed: %v", errProject)
+	}
 	return auth, nil
 }
 
+func (e *AntigravityExecutor) ensureAntigravityProjectID(ctx context.Context, auth *cliproxyauth.Auth, accessToken string) error {
+	if auth == nil {
+		return nil
+	}
+
+	if auth.Metadata["project_id"] != nil {
+		return nil
+	}
+
+	token := strings.TrimSpace(accessToken)
+	if token == "" {
+		token = metaStringValue(auth.Metadata, "access_token")
+	}
+	if token == "" {
+		return nil
+	}
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	projectID, errFetch := sdkAuth.FetchAntigravityProjectID(ctx, token, httpClient)
+	if errFetch != nil {
+		return errFetch
+	}
+	if strings.TrimSpace(projectID) == "" {
+		return nil
+	}
+	if auth.Metadata == nil {
+		auth.Metadata = make(map[string]any)
+	}
+	auth.Metadata["project_id"] = strings.TrimSpace(projectID)
+
+	return nil
+}
+
 func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyauth.Auth, token, modelName string, payload []byte, stream bool, alt, baseURL string) (*http.Request, error) {
 	if token == "" {
 		return nil, statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go
index 84092d37..9634fa37 100644
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io/fs"
+	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
@@ -71,21 +72,26 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
 		if errMarshal != nil {
 			return "", fmt.Errorf("auth filestore: marshal metadata failed: %w", errMarshal)
 		}
-		if existing, errRead := os.ReadFile(path); errRead == nil {
+		if _, errRead := os.ReadFile(path); errRead == nil {
 			// Use metadataEqualIgnoringTimestamps to skip writes when only timestamp fields change.
 			// This prevents the token refresh loop caused by timestamp/expired/expires_in changes.
-			if metadataEqualIgnoringTimestamps(existing, raw) {
-				return path, nil
+			file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600)
+			if errOpen != nil {
+				return "", fmt.Errorf("auth filestore: open existing failed: %w", errOpen)
 			}
-		} else if errRead != nil && !os.IsNotExist(errRead) {
+			if _, errWrite := file.Write(raw); errWrite != nil {
+				_ = file.Close()
+				return "", fmt.Errorf("auth filestore: write existing failed: %w", errWrite)
+			}
+			if errClose := file.Close(); errClose != nil {
+				return "", fmt.Errorf("auth filestore: close existing failed: %w", errClose)
+			}
+			return path, nil
+		} else if !os.IsNotExist(errRead) {
 			return "", fmt.Errorf("auth filestore: read existing failed: %w", errRead)
 		}
-		tmp := path + ".tmp"
-		if errWrite := os.WriteFile(tmp, raw, 0o600); errWrite != nil {
-			return "", fmt.Errorf("auth filestore: write temp failed: %w", errWrite)
-		}
-		if errRename := os.Rename(tmp, path); errRename != nil {
-			return "", fmt.Errorf("auth filestore: rename failed: %w", errRename)
+		if errWrite := os.WriteFile(path, raw, 0o600); errWrite != nil {
+			return "", fmt.Errorf("auth filestore: write file failed: %w", errWrite)
 		}
 	default:
 		return "", fmt.Errorf("auth filestore: nothing to persist for %s", auth.ID)
@@ -178,6 +184,30 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 	if provider == "" {
 		provider = "unknown"
 	}
+	if provider == "antigravity" {
+		projectID := ""
+		if pid, ok := metadata["project_id"].(string); ok {
+			projectID = strings.TrimSpace(pid)
+		}
+		if projectID == "" {
+			accessToken := ""
+			if token, ok := metadata["access_token"].(string); ok {
+				accessToken = strings.TrimSpace(token)
+			}
+			if accessToken != "" {
+				fetchedProjectID, errFetch := FetchAntigravityProjectID(context.Background(), accessToken, http.DefaultClient)
+				if errFetch == nil && strings.TrimSpace(fetchedProjectID) != "" {
+					metadata["project_id"] = strings.TrimSpace(fetchedProjectID)
+					if raw, errMarshal := json.Marshal(metadata); errMarshal == nil {
+						if file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600); errOpen == nil {
+							_, _ = file.Write(raw)
+							_ = file.Close()
+						}
+					}
+				}
+			}
+		}
+	}
 	info, err := os.Stat(path)
 	if err != nil {
 		return nil, fmt.Errorf("stat file: %w", err)
@@ -265,93 +295,3 @@ func (s *FileTokenStore) baseDirSnapshot() string {
 	defer s.dirLock.RUnlock()
 	return s.baseDir
 }
-
-// DEPRECATED: Use metadataEqualIgnoringTimestamps for comparing auth metadata.
-// This function is kept for backward compatibility but can cause refresh loops.
-func jsonEqual(a, b []byte) bool {
-	var objA any
-	var objB any
-	if err := json.Unmarshal(a, &objA); err != nil {
-		return false
-	}
-	if err := json.Unmarshal(b, &objB); err != nil {
-		return false
-	}
-	return deepEqualJSON(objA, objB)
-}
-
-// metadataEqualIgnoringTimestamps compares two metadata JSON blobs,
-// ignoring fields that change on every refresh but don't affect functionality.
-// This prevents unnecessary file writes that would trigger watcher events and
-// create refresh loops.
-func metadataEqualIgnoringTimestamps(a, b []byte) bool {
-	var objA, objB map[string]any
-	if err := json.Unmarshal(a, &objA); err != nil {
-		return false
-	}
-	if err := json.Unmarshal(b, &objB); err != nil {
-		return false
-	}
-
-	// Fields to ignore: these change on every refresh but don't affect authentication logic.
-	// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
-	// - access_token: Google OAuth returns a new access_token on each refresh, this is expected
-	//   and shouldn't trigger file writes (the new token will be fetched again when needed)
-	ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh", "access_token"}
-	for _, field := range ignoredFields {
-		delete(objA, field)
-		delete(objB, field)
-	}
-
-	return deepEqualJSON(objA, objB)
-}
-
-func deepEqualJSON(a, b any) bool {
-	switch valA := a.(type) {
-	case map[string]any:
-		valB, ok := b.(map[string]any)
-		if !ok || len(valA) != len(valB) {
-			return false
-		}
-		for key, subA := range valA {
-			subB, ok1 := valB[key]
-			if !ok1 || !deepEqualJSON(subA, subB) {
-				return false
-			}
-		}
-		return true
-	case []any:
-		sliceB, ok := b.([]any)
-		if !ok || len(valA) != len(sliceB) {
-			return false
-		}
-		for i := range valA {
-			if !deepEqualJSON(valA[i], sliceB[i]) {
-				return false
-			}
-		}
-		return true
-	case float64:
-		valB, ok := b.(float64)
-		if !ok {
-			return false
-		}
-		return valA == valB
-	case string:
-		valB, ok := b.(string)
-		if !ok {
-			return false
-		}
-		return valA == valB
-	case bool:
-		valB, ok := b.(bool)
-		if !ok {
-			return false
-		}
-		return valA == valB
-	case nil:
-		return b == nil
-	default:
-		return false
-	}
-}

From 5c40a2db215b17a1f0f7d383bfb29c18f647eb2b Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 14:03:08 +0800
Subject: [PATCH 24/65] refactor(thinking): simplify ModeNone and budget
 validation logic

---
 internal/registry/model_definitions.go |  4 ++--
 internal/thinking/validate.go          | 33 +++++++++++---------------
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index b8ca8757..13fc8177 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -777,8 +777,8 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-high"},
 		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image"},
 		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash"},
-		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
-		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000},
+		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 	}
 }
 
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index 799c8005..897a64a3 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -159,27 +159,22 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 		normalized = convertAutoToMidRange(normalized, support, provider, model)
 	}
 
-	// ModeNone handling varies by provider/model capability:
-	// - Claude can always disable thinking via thinking.type="disabled".
-	// - Some models/providers cannot be fully disabled; for those we force minimal thinking
-	//   while hiding thoughts (ModeNone + Budget>0 and/or Level set).
-	if normalized.Mode == ModeNone {
-		if provider == "claude" {
-			normalized.Budget = 0
-			normalized.Level = ""
-		} else if !support.ZeroAllowed {
-			if support.Min > 0 {
-				normalized.Budget = support.Min
-			}
-			if normalized.Level == "" && normalized.Budget > 0 && len(support.Levels) > 0 {
-				normalized.Level = ThinkingLevel(support.Levels[0])
-			}
+	if normalized.Mode == ModeNone && provider == "claude" {
+		// Claude supports explicit disable via thinking.type="disabled".
+		// Keep Budget=0 so applier can omit budget_tokens.
+		normalized.Budget = 0
+		normalized.Level = ""
+	} else {
+		switch normalized.Mode {
+		case ModeBudget, ModeAuto, ModeNone:
+			normalized.Budget = ClampBudget(normalized.Budget, modelInfo, provider)
 		}
-	}
 
-	switch normalized.Mode {
-	case ModeBudget, ModeAuto:
-		normalized.Budget = ClampBudget(normalized.Budget, modelInfo, provider)
+		// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
+		// This ensures Apply layer doesn't need to access support.Levels
+		if normalized.Mode == ModeNone && normalized.Budget > 0 && len(support.Levels) > 0 {
+			normalized.Level = ThinkingLevel(support.Levels[0])
+		}
 	}
 
 	return &normalized, nil

From 7248f65c36b5c2dedc336a75057925245113c979 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Thu, 15 Jan 2026 14:05:23 +0800
Subject: [PATCH 25/65] feat(auth): prevent filestore writes on unchanged
 metadata

- Added `metadataEqualIgnoringTimestamps` to compare metadata while ignoring volatile fields.
- Prevented redundant writes caused by changes in timestamp-related fields.
- Improved efficiency in filestore operations by skipping unnecessary updates.
---
 sdk/auth/filestore.go | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go
index 9634fa37..db9f7148 100644
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
+	"reflect"
 	"strings"
 	"sync"
 	"time"
@@ -72,9 +73,12 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
 		if errMarshal != nil {
 			return "", fmt.Errorf("auth filestore: marshal metadata failed: %w", errMarshal)
 		}
-		if _, errRead := os.ReadFile(path); errRead == nil {
+		if existing, errRead := os.ReadFile(path); errRead == nil {
 			// Use metadataEqualIgnoringTimestamps to skip writes when only timestamp fields change.
 			// This prevents the token refresh loop caused by timestamp/expired/expires_in changes.
+			if metadataEqualIgnoringTimestamps(existing, raw) {
+				return path, nil
+			}
 			file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600)
 			if errOpen != nil {
 				return "", fmt.Errorf("auth filestore: open existing failed: %w", errOpen)
@@ -295,3 +299,29 @@ func (s *FileTokenStore) baseDirSnapshot() string {
 	defer s.dirLock.RUnlock()
 	return s.baseDir
 }
+
+// metadataEqualIgnoringTimestamps compares two metadata JSON blobs, ignoring volatile fields that
+// change on every refresh but don't affect authentication logic.
+func metadataEqualIgnoringTimestamps(a, b []byte) bool {
+	var objA map[string]any
+	var objB map[string]any
+	if errUnmarshalA := json.Unmarshal(a, &objA); errUnmarshalA != nil {
+		return false
+	}
+	if errUnmarshalB := json.Unmarshal(b, &objB); errUnmarshalB != nil {
+		return false
+	}
+	stripVolatileMetadataFields(objA)
+	stripVolatileMetadataFields(objB)
+	return reflect.DeepEqual(objA, objB)
+}
+
+func stripVolatileMetadataFields(metadata map[string]any) {
+	if metadata == nil {
+		return
+	}
+	// These fields change on refresh and would otherwise trigger watcher reload loops.
+	for _, field := range []string{"timestamp", "expired", "expires_in", "last_refresh", "access_token"} {
+		delete(metadata, field)
+	}
+}

From ff4ff6bc2f8d90c250e07b58701dba4d3413c1a4 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:41:10 +0800
Subject: [PATCH 26/65] feat(thinking): support zero as a valid thinking budget
 for capable models

---
 internal/thinking/validate.go    |   3 +
 test/thinking_conversion_test.go | 175 ++++++++++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index 897a64a3..f16a18f0 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -54,6 +54,9 @@ func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int
 	}
 
 	if value < min {
+		if value == 0 && support.ZeroAllowed {
+			return 0
+		}
 		logClamp(provider, model, value, min, min, max)
 		return min
 	}
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index d0e88c78..7e35c389 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -86,6 +86,20 @@ func TestThinkingE2EMatrix(t *testing.T) {
 				DynamicAllowed: false,
 			},
 		},
+		{
+			ID:          "antigravity-budget-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "gemini-cli",
+			DisplayName: "Antigravity Budget Model",
+			Thinking: &registry.ThinkingSupport{
+				Min:            128,
+				Max:            20000,
+				ZeroAllowed:    true,
+				DynamicAllowed: true,
+			},
+		},
 		{
 			ID:          "no-thinking-model",
 			Object:      "model",
@@ -618,6 +632,114 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			expectErr:   false,
 		},
 
+		// antigravity-budget-model (Min=128, Max=20000, ZeroAllowed=true, DynamicAllowed=true)
+		{
+			name:        "46",
+			from:        "gemini",
+			to:          "antigravity",
+			modelSuffix: "antigravity-budget-model",
+			inputJSON:   `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:            "47",
+			from:            "gemini",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(medium)",
+			inputJSON:       `{"model":"antigravity-budget-model(medium)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:            "48",
+			from:            "gemini",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(xhigh)",
+			inputJSON:       `{"model":"antigravity-budget-model(xhigh)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:            "49",
+			from:            "gemini",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(none)",
+			inputJSON:       `{"model":"antigravity-budget-model(none)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "0",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		{
+			name:            "50",
+			from:            "gemini",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(auto)",
+			inputJSON:       `{"model":"antigravity-budget-model(auto)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:        "51",
+			from:        "claude",
+			to:          "antigravity",
+			modelSuffix: "antigravity-budget-model",
+			inputJSON:   `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		{
+			name:            "52",
+			from:            "claude",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(8192)",
+			inputJSON:       `{"model":"antigravity-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:            "53",
+			from:            "claude",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(64000)",
+			inputJSON:       `{"model":"antigravity-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:            "54",
+			from:            "claude",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(0)",
+			inputJSON:       `{"model":"antigravity-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "0",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		{
+			name:            "55",
+			from:            "claude",
+			to:              "antigravity",
+			modelSuffix:     "antigravity-budget-model(-1)",
+			inputJSON:       `{"model":"antigravity-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
 		// no-thinking-model (Thinking=nil)
 		{
 			name:        "46",
@@ -801,6 +923,49 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			expectValue: "auto",
 			expectErr:   false,
 		},
+		// openai/codex → gemini/claude for user-defined-model
+		{
+			name:            "64",
+			from:            "openai",
+			to:              "gemini",
+			modelSuffix:     "user-defined-model(8192)",
+			inputJSON:       `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:        "65",
+			from:        "openai",
+			to:          "claude",
+			modelSuffix: "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		{
+			name:            "66",
+			from:            "codex",
+			to:              "gemini",
+			modelSuffix:     "user-defined-model(8192)",
+			inputJSON:       `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		{
+			name:        "67",
+			from:        "codex",
+			to:          "claude",
+			modelSuffix: "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
 	}
 
 	for _, tc := range cases {
@@ -868,9 +1033,13 @@ func TestThinkingE2EMatrix(t *testing.T) {
 				t.Fatalf("field %s: expected %q, got %q, body=%s", tc.expectField, tc.expectValue, actualValue, string(body))
 			}
 
-			// Check includeThoughts for Gemini
-			if tc.includeThoughts != "" && tc.to == "gemini" {
-				itVal := gjson.GetBytes(body, "generationConfig.thinkingConfig.includeThoughts")
+			// Check includeThoughts for Gemini/Antigravity
+			if tc.includeThoughts != "" && (tc.to == "gemini" || tc.to == "antigravity") {
+				path := "generationConfig.thinkingConfig.includeThoughts"
+				if tc.to == "antigravity" {
+					path = "request.generationConfig.thinkingConfig.includeThoughts"
+				}
+				itVal := gjson.GetBytes(body, path)
 				if !itVal.Exists() {
 					t.Fatalf("expected includeThoughts field not found, body=%s", string(body))
 				}

From e0ffec885c29a8f02a50e89b4f72766fc3d59ef6 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 16:06:46 +0800
Subject: [PATCH 27/65] fix(aistudio): remove levels from model definitions

---
 internal/registry/model_definitions.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 13fc8177..2ef3381b 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -432,7 +432,7 @@ func GetAIStudioModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
@@ -447,7 +447,7 @@ func GetAIStudioModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-pro-latest",

From fe5b3c80cb3a01527573d342b92bbe91c4b13874 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 18:03:26 +0800
Subject: [PATCH 28/65] refactor(config): rename oauth-model-mappings to
 oauth-model-alias

---
 config.example.yaml                           |   8 +-
 .../api/handlers/management/config_lists.go   |  70 ++---
 internal/api/server.go                        |   8 +-
 internal/config/config.go                     |  49 ++--
 .../config/oauth_model_alias_migration.go     | 258 ++++++++++++++++++
 .../oauth_model_alias_migration_test.go       | 225 +++++++++++++++
 internal/config/oauth_model_alias_test.go     |  56 ++++
 internal/config/oauth_model_mappings_test.go  |  56 ----
 internal/watcher/config_reload.go             |   2 +-
 internal/watcher/diff/config_diff.go          |   2 +-
 ...model_mappings.go => oauth_model_alias.go} |  44 +--
 ...gs_test.go => api_key_model_alias_test.go} |  14 +-
 sdk/cliproxy/auth/conductor.go                |  78 +++---
 ..._name_mappings.go => oauth_model_alias.go} |  66 ++---
 ...ings_test.go => oauth_model_alias_test.go} |  46 ++--
 sdk/cliproxy/builder.go                       |   2 +-
 sdk/cliproxy/service.go                       |  26 +-
 ...t.go => service_oauth_model_alias_test.go} |  18 +-
 sdk/config/config.go                          |   2 +-
 19 files changed, 761 insertions(+), 269 deletions(-)
 create mode 100644 internal/config/oauth_model_alias_migration.go
 create mode 100644 internal/config/oauth_model_alias_migration_test.go
 create mode 100644 internal/config/oauth_model_alias_test.go
 delete mode 100644 internal/config/oauth_model_mappings_test.go
 rename internal/watcher/diff/{oauth_model_mappings.go => oauth_model_alias.go} (51%)
 rename sdk/cliproxy/auth/{api_key_model_mappings_test.go => api_key_model_alias_test.go} (94%)
 rename sdk/cliproxy/auth/{model_name_mappings.go => oauth_model_alias.go} (73%)
 rename sdk/cliproxy/auth/{model_name_mappings_test.go => oauth_model_alias_test.go} (77%)
 rename sdk/cliproxy/{service_oauth_model_mappings_test.go => service_oauth_model_alias_test.go} (77%)

diff --git a/config.example.yaml b/config.example.yaml
index 353b4f1b..b397be07 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -201,12 +201,12 @@ nonstream-keepalive-interval: 0
 #     - from: "claude-haiku-4-5-20251001"
 #       to: "gemini-2.5-flash"
 
-# Global OAuth model name mappings (per channel)
-# These mappings rename model IDs for both model listing and request routing.
+# Global OAuth model name aliases (per channel)
+# These aliases rename model IDs for both model listing and request routing.
 # Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
-# NOTE: Mappings do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
+# NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
 # You can repeat the same name with different aliases to expose multiple client model names.
-oauth-model-mappings:
+oauth-model-alias:
   antigravity:
     - name: "rev19-uic3-1p"
       alias: "gemini-2.5-computer-use-preview-10-2025"
diff --git a/internal/api/handlers/management/config_lists.go b/internal/api/handlers/management/config_lists.go
index edb7a677..4e0e0284 100644
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -703,21 +703,21 @@ func (h *Handler) DeleteOAuthExcludedModels(c *gin.Context) {
 	h.persist(c)
 }
 
-// oauth-model-mappings: map[string][]ModelNameMapping
-func (h *Handler) GetOAuthModelMappings(c *gin.Context) {
-	c.JSON(200, gin.H{"oauth-model-mappings": sanitizedOAuthModelMappings(h.cfg.OAuthModelMappings)})
+// oauth-model-alias: map[string][]OAuthModelAlias
+func (h *Handler) GetOAuthModelAlias(c *gin.Context) {
+	c.JSON(200, gin.H{"oauth-model-alias": sanitizedOAuthModelAlias(h.cfg.OAuthModelAlias)})
 }
 
-func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
+func (h *Handler) PutOAuthModelAlias(c *gin.Context) {
 	data, err := c.GetRawData()
 	if err != nil {
 		c.JSON(400, gin.H{"error": "failed to read body"})
 		return
 	}
-	var entries map[string][]config.ModelNameMapping
+	var entries map[string][]config.OAuthModelAlias
 	if err = json.Unmarshal(data, &entries); err != nil {
 		var wrapper struct {
-			Items map[string][]config.ModelNameMapping `json:"items"`
+			Items map[string][]config.OAuthModelAlias `json:"items"`
 		}
 		if err2 := json.Unmarshal(data, &wrapper); err2 != nil {
 			c.JSON(400, gin.H{"error": "invalid body"})
@@ -725,15 +725,15 @@ func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
 		}
 		entries = wrapper.Items
 	}
-	h.cfg.OAuthModelMappings = sanitizedOAuthModelMappings(entries)
+	h.cfg.OAuthModelAlias = sanitizedOAuthModelAlias(entries)
 	h.persist(c)
 }
 
-func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
+func (h *Handler) PatchOAuthModelAlias(c *gin.Context) {
 	var body struct {
-		Provider *string                   `json:"provider"`
-		Channel  *string                   `json:"channel"`
-		Mappings []config.ModelNameMapping `json:"mappings"`
+		Provider *string                  `json:"provider"`
+		Channel  *string                  `json:"channel"`
+		Aliases  []config.OAuthModelAlias `json:"aliases"`
 	}
 	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
 		c.JSON(400, gin.H{"error": "invalid body"})
@@ -751,32 +751,32 @@ func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
 		return
 	}
 
-	normalizedMap := sanitizedOAuthModelMappings(map[string][]config.ModelNameMapping{channel: body.Mappings})
+	normalizedMap := sanitizedOAuthModelAlias(map[string][]config.OAuthModelAlias{channel: body.Aliases})
 	normalized := normalizedMap[channel]
 	if len(normalized) == 0 {
-		if h.cfg.OAuthModelMappings == nil {
+		if h.cfg.OAuthModelAlias == nil {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
-		if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
+		if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
-		delete(h.cfg.OAuthModelMappings, channel)
-		if len(h.cfg.OAuthModelMappings) == 0 {
-			h.cfg.OAuthModelMappings = nil
+		delete(h.cfg.OAuthModelAlias, channel)
+		if len(h.cfg.OAuthModelAlias) == 0 {
+			h.cfg.OAuthModelAlias = nil
 		}
 		h.persist(c)
 		return
 	}
-	if h.cfg.OAuthModelMappings == nil {
-		h.cfg.OAuthModelMappings = make(map[string][]config.ModelNameMapping)
+	if h.cfg.OAuthModelAlias == nil {
+		h.cfg.OAuthModelAlias = make(map[string][]config.OAuthModelAlias)
 	}
-	h.cfg.OAuthModelMappings[channel] = normalized
+	h.cfg.OAuthModelAlias[channel] = normalized
 	h.persist(c)
 }
 
-func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
+func (h *Handler) DeleteOAuthModelAlias(c *gin.Context) {
 	channel := strings.ToLower(strings.TrimSpace(c.Query("channel")))
 	if channel == "" {
 		channel = strings.ToLower(strings.TrimSpace(c.Query("provider")))
@@ -785,17 +785,17 @@ func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
 		c.JSON(400, gin.H{"error": "missing channel"})
 		return
 	}
-	if h.cfg.OAuthModelMappings == nil {
+	if h.cfg.OAuthModelAlias == nil {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
-	if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
+	if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
-	delete(h.cfg.OAuthModelMappings, channel)
-	if len(h.cfg.OAuthModelMappings) == 0 {
-		h.cfg.OAuthModelMappings = nil
+	delete(h.cfg.OAuthModelAlias, channel)
+	if len(h.cfg.OAuthModelAlias) == 0 {
+		h.cfg.OAuthModelAlias = nil
 	}
 	h.persist(c)
 }
@@ -1042,26 +1042,26 @@ func normalizeVertexCompatKey(entry *config.VertexCompatKey) {
 	entry.Models = normalized
 }
 
-func sanitizedOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string][]config.ModelNameMapping {
+func sanitizedOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string][]config.OAuthModelAlias {
 	if len(entries) == 0 {
 		return nil
 	}
-	copied := make(map[string][]config.ModelNameMapping, len(entries))
-	for channel, mappings := range entries {
-		if len(mappings) == 0 {
+	copied := make(map[string][]config.OAuthModelAlias, len(entries))
+	for channel, aliases := range entries {
+		if len(aliases) == 0 {
 			continue
 		}
-		copied[channel] = append([]config.ModelNameMapping(nil), mappings...)
+		copied[channel] = append([]config.OAuthModelAlias(nil), aliases...)
 	}
 	if len(copied) == 0 {
 		return nil
 	}
-	cfg := config.Config{OAuthModelMappings: copied}
-	cfg.SanitizeOAuthModelMappings()
-	if len(cfg.OAuthModelMappings) == 0 {
+	cfg := config.Config{OAuthModelAlias: copied}
+	cfg.SanitizeOAuthModelAlias()
+	if len(cfg.OAuthModelAlias) == 0 {
 		return nil
 	}
-	return cfg.OAuthModelMappings
+	return cfg.OAuthModelAlias
 }
 
 // GetAmpCode returns the complete ampcode configuration.
diff --git a/internal/api/server.go b/internal/api/server.go
index 05bb2fee..5b425e7c 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -601,10 +601,10 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PATCH("/oauth-excluded-models", s.mgmt.PatchOAuthExcludedModels)
 		mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)
 
-		mgmt.GET("/oauth-model-mappings", s.mgmt.GetOAuthModelMappings)
-		mgmt.PUT("/oauth-model-mappings", s.mgmt.PutOAuthModelMappings)
-		mgmt.PATCH("/oauth-model-mappings", s.mgmt.PatchOAuthModelMappings)
-		mgmt.DELETE("/oauth-model-mappings", s.mgmt.DeleteOAuthModelMappings)
+		mgmt.GET("/oauth-model-alias", s.mgmt.GetOAuthModelAlias)
+		mgmt.PUT("/oauth-model-alias", s.mgmt.PutOAuthModelAlias)
+		mgmt.PATCH("/oauth-model-alias", s.mgmt.PatchOAuthModelAlias)
+		mgmt.DELETE("/oauth-model-alias", s.mgmt.DeleteOAuthModelAlias)
 
 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
diff --git a/internal/config/config.go b/internal/config/config.go
index effb44f5..c66229a8 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -91,13 +91,13 @@ type Config struct {
 	// OAuthExcludedModels defines per-provider global model exclusions applied to OAuth/file-backed auth entries.
 	OAuthExcludedModels map[string][]string `yaml:"oauth-excluded-models,omitempty" json:"oauth-excluded-models,omitempty"`
 
-	// OAuthModelMappings defines global model name mappings for OAuth/file-backed auth channels.
-	// These mappings affect both model listing and model routing for supported channels:
+	// OAuthModelAlias defines global model name aliases for OAuth/file-backed auth channels.
+	// These aliases affect both model listing and model routing for supported channels:
 	// gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
 	//
 	// NOTE: This does not apply to existing per-credential model alias features under:
 	// gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, and ampcode.
-	OAuthModelMappings map[string][]ModelNameMapping `yaml:"oauth-model-mappings,omitempty" json:"oauth-model-mappings,omitempty"`
+	OAuthModelAlias map[string][]OAuthModelAlias `yaml:"oauth-model-alias,omitempty" json:"oauth-model-alias,omitempty"`
 
 	// Payload defines default and override rules for provider payload parameters.
 	Payload PayloadConfig `yaml:"payload" json:"payload"`
@@ -145,11 +145,11 @@ type RoutingConfig struct {
 	Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"`
 }
 
-// ModelNameMapping defines a model ID mapping for a specific channel.
+// OAuthModelAlias defines a model ID alias for a specific channel.
 // It maps the upstream model name (Name) to the client-visible alias (Alias).
 // When Fork is true, the alias is added as an additional model in listings while
 // keeping the original model ID available.
-type ModelNameMapping struct {
+type OAuthModelAlias struct {
 	Name  string `yaml:"name" json:"name"`
 	Alias string `yaml:"alias" json:"alias"`
 	Fork  bool   `yaml:"fork,omitempty" json:"fork,omitempty"`
@@ -436,6 +436,15 @@ func LoadConfig(configFile string) (*Config, error) {
 // If optional is true and the file is missing, it returns an empty Config.
 // If optional is true and the file is empty or invalid, it returns an empty Config.
 func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
+	// Perform oauth-model-alias migration before loading config.
+	// This migrates oauth-model-mappings to oauth-model-alias if needed.
+	if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
+		// Log warning but don't fail - config loading should still work
+		fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
+	} else if migrated {
+		fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
+	}
+
 	// Read the entire configuration file into memory.
 	data, err := os.ReadFile(configFile)
 	if err != nil {
@@ -528,8 +537,8 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Normalize OAuth provider model exclusion map.
 	cfg.OAuthExcludedModels = NormalizeOAuthExcludedModels(cfg.OAuthExcludedModels)
 
-	// Normalize global OAuth model name mappings.
-	cfg.SanitizeOAuthModelMappings()
+	// Normalize global OAuth model name aliases.
+	cfg.SanitizeOAuthModelAlias()
 
 	if cfg.legacyMigrationPending {
 		fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
@@ -547,24 +556,24 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	return &cfg, nil
 }
 
-// SanitizeOAuthModelMappings normalizes and deduplicates global OAuth model name mappings.
+// SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases.
 // It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
 // allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
-func (cfg *Config) SanitizeOAuthModelMappings() {
-	if cfg == nil || len(cfg.OAuthModelMappings) == 0 {
+func (cfg *Config) SanitizeOAuthModelAlias() {
+	if cfg == nil || len(cfg.OAuthModelAlias) == 0 {
 		return
 	}
-	out := make(map[string][]ModelNameMapping, len(cfg.OAuthModelMappings))
-	for rawChannel, mappings := range cfg.OAuthModelMappings {
+	out := make(map[string][]OAuthModelAlias, len(cfg.OAuthModelAlias))
+	for rawChannel, aliases := range cfg.OAuthModelAlias {
 		channel := strings.ToLower(strings.TrimSpace(rawChannel))
-		if channel == "" || len(mappings) == 0 {
+		if channel == "" || len(aliases) == 0 {
 			continue
 		}
-		seenAlias := make(map[string]struct{}, len(mappings))
-		clean := make([]ModelNameMapping, 0, len(mappings))
-		for _, mapping := range mappings {
-			name := strings.TrimSpace(mapping.Name)
-			alias := strings.TrimSpace(mapping.Alias)
+		seenAlias := make(map[string]struct{}, len(aliases))
+		clean := make([]OAuthModelAlias, 0, len(aliases))
+		for _, entry := range aliases {
+			name := strings.TrimSpace(entry.Name)
+			alias := strings.TrimSpace(entry.Alias)
 			if name == "" || alias == "" {
 				continue
 			}
@@ -576,13 +585,13 @@ func (cfg *Config) SanitizeOAuthModelMappings() {
 				continue
 			}
 			seenAlias[aliasKey] = struct{}{}
-			clean = append(clean, ModelNameMapping{Name: name, Alias: alias, Fork: mapping.Fork})
+			clean = append(clean, OAuthModelAlias{Name: name, Alias: alias, Fork: entry.Fork})
 		}
 		if len(clean) > 0 {
 			out[channel] = clean
 		}
 	}
-	cfg.OAuthModelMappings = out
+	cfg.OAuthModelAlias = out
 }
 
 // SanitizeOpenAICompatibility removes OpenAI-compatibility provider entries that are
diff --git a/internal/config/oauth_model_alias_migration.go b/internal/config/oauth_model_alias_migration.go
new file mode 100644
index 00000000..0e3b2156
--- /dev/null
+++ b/internal/config/oauth_model_alias_migration.go
@@ -0,0 +1,258 @@
+package config
+
+import (
+	"os"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+
+// antigravityModelConversionTable maps old built-in aliases to actual model names
+// for the antigravity channel during migration.
+var antigravityModelConversionTable = map[string]string{
+	"gemini-2.5-computer-use-preview-10-2025": "rev19-uic3-1p",
+	"gemini-3-pro-image-preview":              "gemini-3-pro-image",
+	"gemini-3-pro-preview":                    "gemini-3-pro-high",
+	"gemini-3-flash-preview":                  "gemini-3-flash",
+	"gemini-claude-sonnet-4-5":                "claude-sonnet-4-5",
+	"gemini-claude-sonnet-4-5-thinking":       "claude-sonnet-4-5-thinking",
+	"gemini-claude-opus-4-5-thinking":         "claude-opus-4-5-thinking",
+}
+
+// defaultAntigravityAliases returns the default oauth-model-alias configuration
+// for the antigravity channel when neither field exists.
+func defaultAntigravityAliases() []OAuthModelAlias {
+	return []OAuthModelAlias{
+		{Name: "rev19-uic3-1p", Alias: "gemini-2.5-computer-use-preview-10-2025"},
+		{Name: "gemini-3-pro-image", Alias: "gemini-3-pro-image-preview"},
+		{Name: "gemini-3-pro-high", Alias: "gemini-3-pro-preview"},
+		{Name: "gemini-3-flash", Alias: "gemini-3-flash-preview"},
+		{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
+		{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
+		{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
+	}
+}
+
+// MigrateOAuthModelAlias checks for and performs migration from oauth-model-mappings
+// to oauth-model-alias at startup. Returns true if migration was performed.
+//
+// Migration flow:
+// 1. Check if oauth-model-alias exists -> skip migration
+// 2. Check if oauth-model-mappings exists -> convert and migrate
+//   - For antigravity channel, convert old built-in aliases to actual model names
+//
+// 3. Neither exists -> add default antigravity config
+func MigrateOAuthModelAlias(configFile string) (bool, error) {
+	data, err := os.ReadFile(configFile)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return false, nil
+		}
+		return false, err
+	}
+	if len(data) == 0 {
+		return false, nil
+	}
+
+	// Parse YAML into node tree to preserve structure
+	var root yaml.Node
+	if err := yaml.Unmarshal(data, &root); err != nil {
+		return false, nil
+	}
+	if root.Kind != yaml.DocumentNode || len(root.Content) == 0 {
+		return false, nil
+	}
+	rootMap := root.Content[0]
+	if rootMap == nil || rootMap.Kind != yaml.MappingNode {
+		return false, nil
+	}
+
+	// Check if oauth-model-alias already exists
+	if findMapKeyIndex(rootMap, "oauth-model-alias") >= 0 {
+		return false, nil
+	}
+
+	// Check if oauth-model-mappings exists
+	oldIdx := findMapKeyIndex(rootMap, "oauth-model-mappings")
+	if oldIdx >= 0 {
+		// Migrate from old field
+		return migrateFromOldField(configFile, &root, rootMap, oldIdx)
+	}
+
+	// Neither field exists - add default antigravity config
+	return addDefaultAntigravityConfig(configFile, &root, rootMap)
+}
+
+// migrateFromOldField converts oauth-model-mappings to oauth-model-alias
+func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node, oldIdx int) (bool, error) {
+	if oldIdx+1 >= len(rootMap.Content) {
+		return false, nil
+	}
+	oldValue := rootMap.Content[oldIdx+1]
+	if oldValue == nil || oldValue.Kind != yaml.MappingNode {
+		return false, nil
+	}
+
+	// Parse the old aliases
+	oldAliases := parseOldAliasNode(oldValue)
+	if len(oldAliases) == 0 {
+		// Remove the old field and write
+		removeMapKeyByIndex(rootMap, oldIdx)
+		return writeYAMLNode(configFile, root)
+	}
+
+	// Convert model names for antigravity channel
+	newAliases := make(map[string][]OAuthModelAlias, len(oldAliases))
+	for channel, entries := range oldAliases {
+		converted := make([]OAuthModelAlias, 0, len(entries))
+		for _, entry := range entries {
+			newEntry := OAuthModelAlias{
+				Name:  entry.Name,
+				Alias: entry.Alias,
+				Fork:  entry.Fork,
+			}
+			// Convert model names for antigravity channel
+			if strings.EqualFold(channel, "antigravity") {
+				if actual, ok := antigravityModelConversionTable[entry.Name]; ok {
+					newEntry.Name = actual
+				}
+			}
+			converted = append(converted, newEntry)
+		}
+		newAliases[channel] = converted
+	}
+
+	// Build new node
+	newNode := buildOAuthModelAliasNode(newAliases)
+
+	// Replace old key with new key and value
+	rootMap.Content[oldIdx].Value = "oauth-model-alias"
+	rootMap.Content[oldIdx+1] = newNode
+
+	return writeYAMLNode(configFile, root)
+}
+
+// addDefaultAntigravityConfig adds the default antigravity configuration
+func addDefaultAntigravityConfig(configFile string, root *yaml.Node, rootMap *yaml.Node) (bool, error) {
+	defaults := map[string][]OAuthModelAlias{
+		"antigravity": defaultAntigravityAliases(),
+	}
+	newNode := buildOAuthModelAliasNode(defaults)
+
+	// Add new key-value pair
+	keyNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "oauth-model-alias"}
+	rootMap.Content = append(rootMap.Content, keyNode, newNode)
+
+	return writeYAMLNode(configFile, root)
+}
+
+// parseOldAliasNode parses the old oauth-model-mappings node structure
+func parseOldAliasNode(node *yaml.Node) map[string][]OAuthModelAlias {
+	if node == nil || node.Kind != yaml.MappingNode {
+		return nil
+	}
+	result := make(map[string][]OAuthModelAlias)
+	for i := 0; i+1 < len(node.Content); i += 2 {
+		channelNode := node.Content[i]
+		entriesNode := node.Content[i+1]
+		if channelNode == nil || entriesNode == nil {
+			continue
+		}
+		channel := strings.ToLower(strings.TrimSpace(channelNode.Value))
+		if channel == "" || entriesNode.Kind != yaml.SequenceNode {
+			continue
+		}
+		entries := make([]OAuthModelAlias, 0, len(entriesNode.Content))
+		for _, entryNode := range entriesNode.Content {
+			if entryNode == nil || entryNode.Kind != yaml.MappingNode {
+				continue
+			}
+			entry := parseAliasEntry(entryNode)
+			if entry.Name != "" && entry.Alias != "" {
+				entries = append(entries, entry)
+			}
+		}
+		if len(entries) > 0 {
+			result[channel] = entries
+		}
+	}
+	return result
+}
+
+// parseAliasEntry parses a single alias entry node
+func parseAliasEntry(node *yaml.Node) OAuthModelAlias {
+	var entry OAuthModelAlias
+	for i := 0; i+1 < len(node.Content); i += 2 {
+		keyNode := node.Content[i]
+		valNode := node.Content[i+1]
+		if keyNode == nil || valNode == nil {
+			continue
+		}
+		switch strings.ToLower(strings.TrimSpace(keyNode.Value)) {
+		case "name":
+			entry.Name = strings.TrimSpace(valNode.Value)
+		case "alias":
+			entry.Alias = strings.TrimSpace(valNode.Value)
+		case "fork":
+			entry.Fork = strings.ToLower(strings.TrimSpace(valNode.Value)) == "true"
+		}
+	}
+	return entry
+}
+
+// buildOAuthModelAliasNode creates a YAML node for oauth-model-alias
+func buildOAuthModelAliasNode(aliases map[string][]OAuthModelAlias) *yaml.Node {
+	node := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
+	for channel, entries := range aliases {
+		channelNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: channel}
+		entriesNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
+		for _, entry := range entries {
+			entryNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
+			entryNode.Content = append(entryNode.Content,
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "name"},
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Name},
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "alias"},
+				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Alias},
+			)
+			if entry.Fork {
+				entryNode.Content = append(entryNode.Content,
+					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "fork"},
+					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!bool", Value: "true"},
+				)
+			}
+			entriesNode.Content = append(entriesNode.Content, entryNode)
+		}
+		node.Content = append(node.Content, channelNode, entriesNode)
+	}
+	return node
+}
+
+// removeMapKeyByIndex removes a key-value pair from a mapping node by index
+func removeMapKeyByIndex(mapNode *yaml.Node, keyIdx int) {
+	if mapNode == nil || mapNode.Kind != yaml.MappingNode {
+		return
+	}
+	if keyIdx < 0 || keyIdx+1 >= len(mapNode.Content) {
+		return
+	}
+	mapNode.Content = append(mapNode.Content[:keyIdx], mapNode.Content[keyIdx+2:]...)
+}
+
+// writeYAMLNode writes the YAML node tree back to file
+func writeYAMLNode(configFile string, root *yaml.Node) (bool, error) {
+	f, err := os.Create(configFile)
+	if err != nil {
+		return false, err
+	}
+	defer f.Close()
+
+	enc := yaml.NewEncoder(f)
+	enc.SetIndent(2)
+	if err := enc.Encode(root); err != nil {
+		return false, err
+	}
+	if err := enc.Close(); err != nil {
+		return false, err
+	}
+	return true, nil
+}
diff --git a/internal/config/oauth_model_alias_migration_test.go b/internal/config/oauth_model_alias_migration_test.go
new file mode 100644
index 00000000..ab5a1f49
--- /dev/null
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -0,0 +1,225 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"gopkg.in/yaml.v3"
+)
+
+func TestMigrateOAuthModelAlias_SkipsIfNewFieldExists(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `oauth-model-alias:
+  gemini-cli:
+    - name: "gemini-2.5-pro"
+      alias: "g2.5p"
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if migrated {
+		t.Fatal("expected no migration when oauth-model-alias already exists")
+	}
+
+	// Verify file unchanged
+	data, _ := os.ReadFile(configFile)
+	if !strings.Contains(string(data), "oauth-model-alias:") {
+		t.Fatal("file should still contain oauth-model-alias")
+	}
+}
+
+func TestMigrateOAuthModelAlias_MigratesOldField(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `oauth-model-mappings:
+  gemini-cli:
+    - name: "gemini-2.5-pro"
+      alias: "g2.5p"
+      fork: true
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to occur")
+	}
+
+	// Verify new field exists and old field removed
+	data, _ := os.ReadFile(configFile)
+	if strings.Contains(string(data), "oauth-model-mappings:") {
+		t.Fatal("old field should be removed")
+	}
+	if !strings.Contains(string(data), "oauth-model-alias:") {
+		t.Fatal("new field should exist")
+	}
+
+	// Parse and verify structure
+	var root yaml.Node
+	if err := yaml.Unmarshal(data, &root); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	// Use old model names that should be converted
+	content := `oauth-model-mappings:
+  antigravity:
+    - name: "gemini-2.5-computer-use-preview-10-2025"
+      alias: "computer-use"
+    - name: "gemini-3-pro-preview"
+      alias: "g3p"
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to occur")
+	}
+
+	// Verify model names were converted
+	data, _ := os.ReadFile(configFile)
+	content = string(data)
+	if !strings.Contains(content, "rev19-uic3-1p") {
+		t.Fatal("expected gemini-2.5-computer-use-preview-10-2025 to be converted to rev19-uic3-1p")
+	}
+	if !strings.Contains(content, "gemini-3-pro-high") {
+		t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high")
+	}
+}
+
+func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `debug: true
+port: 8080
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to add default config")
+	}
+
+	// Verify default antigravity config was added
+	data, _ := os.ReadFile(configFile)
+	content = string(data)
+	if !strings.Contains(content, "oauth-model-alias:") {
+		t.Fatal("expected oauth-model-alias to be added")
+	}
+	if !strings.Contains(content, "antigravity:") {
+		t.Fatal("expected antigravity channel to be added")
+	}
+	if !strings.Contains(content, "rev19-uic3-1p") {
+		t.Fatal("expected default antigravity aliases to include rev19-uic3-1p")
+	}
+}
+
+func TestMigrateOAuthModelAlias_PreservesOtherConfig(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	content := `debug: true
+port: 8080
+oauth-model-mappings:
+  gemini-cli:
+    - name: "test"
+      alias: "t"
+api-keys:
+  - "key1"
+  - "key2"
+`
+	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !migrated {
+		t.Fatal("expected migration to occur")
+	}
+
+	// Verify other config preserved
+	data, _ := os.ReadFile(configFile)
+	content = string(data)
+	if !strings.Contains(content, "debug: true") {
+		t.Fatal("expected debug field to be preserved")
+	}
+	if !strings.Contains(content, "port: 8080") {
+		t.Fatal("expected port field to be preserved")
+	}
+	if !strings.Contains(content, "api-keys:") {
+		t.Fatal("expected api-keys field to be preserved")
+	}
+}
+
+func TestMigrateOAuthModelAlias_NonexistentFile(t *testing.T) {
+	t.Parallel()
+
+	migrated, err := MigrateOAuthModelAlias("/nonexistent/path/config.yaml")
+	if err != nil {
+		t.Fatalf("unexpected error for nonexistent file: %v", err)
+	}
+	if migrated {
+		t.Fatal("expected no migration for nonexistent file")
+	}
+}
+
+func TestMigrateOAuthModelAlias_EmptyFile(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	configFile := filepath.Join(dir, "config.yaml")
+
+	if err := os.WriteFile(configFile, []byte(""), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	migrated, err := MigrateOAuthModelAlias(configFile)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if migrated {
+		t.Fatal("expected no migration for empty file")
+	}
+}
diff --git a/internal/config/oauth_model_alias_test.go b/internal/config/oauth_model_alias_test.go
new file mode 100644
index 00000000..a5886474
--- /dev/null
+++ b/internal/config/oauth_model_alias_test.go
@@ -0,0 +1,56 @@
+package config
+
+import "testing"
+
+func TestSanitizeOAuthModelAlias_PreservesForkFlag(t *testing.T) {
+	cfg := &Config{
+		OAuthModelAlias: map[string][]OAuthModelAlias{
+			" CoDeX ": {
+				{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
+				{Name: "gpt-6", Alias: "g6"},
+			},
+		},
+	}
+
+	cfg.SanitizeOAuthModelAlias()
+
+	aliases := cfg.OAuthModelAlias["codex"]
+	if len(aliases) != 2 {
+		t.Fatalf("expected 2 sanitized aliases, got %d", len(aliases))
+	}
+	if aliases[0].Name != "gpt-5" || aliases[0].Alias != "g5" || !aliases[0].Fork {
+		t.Fatalf("expected first alias to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", aliases[0].Name, aliases[0].Alias, aliases[0].Fork)
+	}
+	if aliases[1].Name != "gpt-6" || aliases[1].Alias != "g6" || aliases[1].Fork {
+		t.Fatalf("expected second alias to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", aliases[1].Name, aliases[1].Alias, aliases[1].Fork)
+	}
+}
+
+func TestSanitizeOAuthModelAlias_AllowsMultipleAliasesForSameName(t *testing.T) {
+	cfg := &Config{
+		OAuthModelAlias: map[string][]OAuthModelAlias{
+			"antigravity": {
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
+			},
+		},
+	}
+
+	cfg.SanitizeOAuthModelAlias()
+
+	aliases := cfg.OAuthModelAlias["antigravity"]
+	expected := []OAuthModelAlias{
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
+	}
+	if len(aliases) != len(expected) {
+		t.Fatalf("expected %d sanitized aliases, got %d", len(expected), len(aliases))
+	}
+	for i, exp := range expected {
+		if aliases[i].Name != exp.Name || aliases[i].Alias != exp.Alias || aliases[i].Fork != exp.Fork {
+			t.Fatalf("expected alias %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, aliases[i].Name, aliases[i].Alias, aliases[i].Fork)
+		}
+	}
+}
diff --git a/internal/config/oauth_model_mappings_test.go b/internal/config/oauth_model_mappings_test.go
deleted file mode 100644
index 10bfe165..00000000
--- a/internal/config/oauth_model_mappings_test.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package config
-
-import "testing"
-
-func TestSanitizeOAuthModelMappings_PreservesForkFlag(t *testing.T) {
-	cfg := &Config{
-		OAuthModelMappings: map[string][]ModelNameMapping{
-			" CoDeX ": {
-				{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
-				{Name: "gpt-6", Alias: "g6"},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelMappings()
-
-	mappings := cfg.OAuthModelMappings["codex"]
-	if len(mappings) != 2 {
-		t.Fatalf("expected 2 sanitized mappings, got %d", len(mappings))
-	}
-	if mappings[0].Name != "gpt-5" || mappings[0].Alias != "g5" || !mappings[0].Fork {
-		t.Fatalf("expected first mapping to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", mappings[0].Name, mappings[0].Alias, mappings[0].Fork)
-	}
-	if mappings[1].Name != "gpt-6" || mappings[1].Alias != "g6" || mappings[1].Fork {
-		t.Fatalf("expected second mapping to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", mappings[1].Name, mappings[1].Alias, mappings[1].Fork)
-	}
-}
-
-func TestSanitizeOAuthModelMappings_AllowsMultipleAliasesForSameName(t *testing.T) {
-	cfg := &Config{
-		OAuthModelMappings: map[string][]ModelNameMapping{
-			"antigravity": {
-				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
-				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
-				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelMappings()
-
-	mappings := cfg.OAuthModelMappings["antigravity"]
-	expected := []ModelNameMapping{
-		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
-		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
-		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
-	}
-	if len(mappings) != len(expected) {
-		t.Fatalf("expected %d sanitized mappings, got %d", len(expected), len(mappings))
-	}
-	for i, exp := range expected {
-		if mappings[i].Name != exp.Name || mappings[i].Alias != exp.Alias || mappings[i].Fork != exp.Fork {
-			t.Fatalf("expected mapping %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, mappings[i].Name, mappings[i].Alias, mappings[i].Fork)
-		}
-	}
-}
diff --git a/internal/watcher/config_reload.go b/internal/watcher/config_reload.go
index 370ee4e1..edac3474 100644
--- a/internal/watcher/config_reload.go
+++ b/internal/watcher/config_reload.go
@@ -127,7 +127,7 @@ func (w *Watcher) reloadConfig() bool {
 	}
 
 	authDirChanged := oldConfig == nil || oldConfig.AuthDir != newConfig.AuthDir
-	forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelMappings, newConfig.OAuthModelMappings))
+	forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias))
 
 	log.Infof("config successfully reloaded, triggering client reload")
 	w.reloadClients(authDirChanged, affectedOAuthProviders, forceAuthRefresh)
diff --git a/internal/watcher/diff/config_diff.go b/internal/watcher/diff/config_diff.go
index fecbc242..2620f4ee 100644
--- a/internal/watcher/diff/config_diff.go
+++ b/internal/watcher/diff/config_diff.go
@@ -212,7 +212,7 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if entries, _ := DiffOAuthExcludedModelChanges(oldCfg.OAuthExcludedModels, newCfg.OAuthExcludedModels); len(entries) > 0 {
 		changes = append(changes, entries...)
 	}
-	if entries, _ := DiffOAuthModelMappingChanges(oldCfg.OAuthModelMappings, newCfg.OAuthModelMappings); len(entries) > 0 {
+	if entries, _ := DiffOAuthModelAliasChanges(oldCfg.OAuthModelAlias, newCfg.OAuthModelAlias); len(entries) > 0 {
 		changes = append(changes, entries...)
 	}
 
diff --git a/internal/watcher/diff/oauth_model_mappings.go b/internal/watcher/diff/oauth_model_alias.go
similarity index 51%
rename from internal/watcher/diff/oauth_model_mappings.go
rename to internal/watcher/diff/oauth_model_alias.go
index c002855c..c5a17d29 100644
--- a/internal/watcher/diff/oauth_model_mappings.go
+++ b/internal/watcher/diff/oauth_model_alias.go
@@ -10,23 +10,23 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 )
 
-type OAuthModelMappingsSummary struct {
+type OAuthModelAliasSummary struct {
 	hash  string
 	count int
 }
 
-// SummarizeOAuthModelMappings summarizes OAuth model mappings per channel.
-func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string]OAuthModelMappingsSummary {
+// SummarizeOAuthModelAlias summarizes OAuth model alias per channel.
+func SummarizeOAuthModelAlias(entries map[string][]config.OAuthModelAlias) map[string]OAuthModelAliasSummary {
 	if len(entries) == 0 {
 		return nil
 	}
-	out := make(map[string]OAuthModelMappingsSummary, len(entries))
+	out := make(map[string]OAuthModelAliasSummary, len(entries))
 	for k, v := range entries {
 		key := strings.ToLower(strings.TrimSpace(k))
 		if key == "" {
 			continue
 		}
-		out[key] = summarizeOAuthModelMappingList(v)
+		out[key] = summarizeOAuthModelAliasList(v)
 	}
 	if len(out) == 0 {
 		return nil
@@ -34,10 +34,10 @@ func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) m
 	return out
 }
 
-// DiffOAuthModelMappingChanges compares OAuth model mappings maps.
-func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMapping) ([]string, []string) {
-	oldSummary := SummarizeOAuthModelMappings(oldMap)
-	newSummary := SummarizeOAuthModelMappings(newMap)
+// DiffOAuthModelAliasChanges compares OAuth model alias maps.
+func DiffOAuthModelAliasChanges(oldMap, newMap map[string][]config.OAuthModelAlias) ([]string, []string) {
+	oldSummary := SummarizeOAuthModelAlias(oldMap)
+	newSummary := SummarizeOAuthModelAlias(newMap)
 	keys := make(map[string]struct{}, len(oldSummary)+len(newSummary))
 	for k := range oldSummary {
 		keys[k] = struct{}{}
@@ -52,13 +52,13 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa
 		newInfo, okNew := newSummary[key]
 		switch {
 		case okOld && !okNew:
-			changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: removed", key))
+			changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: removed", key))
 			affected = append(affected, key)
 		case !okOld && okNew:
-			changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: added (%d entries)", key, newInfo.count))
+			changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: added (%d entries)", key, newInfo.count))
 			affected = append(affected, key)
 		case okOld && okNew && oldInfo.hash != newInfo.hash:
-			changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
+			changes = append(changes, fmt.Sprintf("oauth-model-alias[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
 			affected = append(affected, key)
 		}
 	}
@@ -67,20 +67,20 @@ func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMa
 	return changes, affected
 }
 
-func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMappingsSummary {
+func summarizeOAuthModelAliasList(list []config.OAuthModelAlias) OAuthModelAliasSummary {
 	if len(list) == 0 {
-		return OAuthModelMappingsSummary{}
+		return OAuthModelAliasSummary{}
 	}
 	seen := make(map[string]struct{}, len(list))
 	normalized := make([]string, 0, len(list))
-	for _, mapping := range list {
-		name := strings.ToLower(strings.TrimSpace(mapping.Name))
-		alias := strings.ToLower(strings.TrimSpace(mapping.Alias))
-		if name == "" || alias == "" {
+	for _, alias := range list {
+		name := strings.ToLower(strings.TrimSpace(alias.Name))
+		aliasVal := strings.ToLower(strings.TrimSpace(alias.Alias))
+		if name == "" || aliasVal == "" {
 			continue
 		}
-		key := name + "->" + alias
-		if mapping.Fork {
+		key := name + "->" + aliasVal
+		if alias.Fork {
 			key += "|fork"
 		}
 		if _, exists := seen[key]; exists {
@@ -90,11 +90,11 @@ func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMa
 		normalized = append(normalized, key)
 	}
 	if len(normalized) == 0 {
-		return OAuthModelMappingsSummary{}
+		return OAuthModelAliasSummary{}
 	}
 	sort.Strings(normalized)
 	sum := sha256.Sum256([]byte(strings.Join(normalized, "|")))
-	return OAuthModelMappingsSummary{
+	return OAuthModelAliasSummary{
 		hash:  hex.EncodeToString(sum[:]),
 		count: len(normalized),
 	}
diff --git a/sdk/cliproxy/auth/api_key_model_mappings_test.go b/sdk/cliproxy/auth/api_key_model_alias_test.go
similarity index 94%
rename from sdk/cliproxy/auth/api_key_model_mappings_test.go
rename to sdk/cliproxy/auth/api_key_model_alias_test.go
index 9f3bd7fe..70915d9e 100644
--- a/sdk/cliproxy/auth/api_key_model_mappings_test.go
+++ b/sdk/cliproxy/auth/api_key_model_alias_test.go
@@ -66,7 +66,7 @@ func TestLookupAPIKeyUpstreamModel(t *testing.T) {
 	}
 }
 
-func TestAPIKeyModelMappings_ConfigHotReload(t *testing.T) {
+func TestAPIKeyModelAlias_ConfigHotReload(t *testing.T) {
 	cfg := &internalconfig.Config{
 		GeminiKey: []internalconfig.GeminiKey{
 			{
@@ -82,12 +82,12 @@ func TestAPIKeyModelMappings_ConfigHotReload(t *testing.T) {
 	ctx := context.Background()
 	_, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}})
 
-	// Initial mapping
+	// Initial alias
 	if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-pro-exp-03-25" {
 		t.Fatalf("before reload: got %q, want %q", resolved, "gemini-2.5-pro-exp-03-25")
 	}
 
-	// Hot reload with new mapping
+	// Hot reload with new alias
 	mgr.SetConfig(&internalconfig.Config{
 		GeminiKey: []internalconfig.GeminiKey{
 			{
@@ -97,13 +97,13 @@ func TestAPIKeyModelMappings_ConfigHotReload(t *testing.T) {
 		},
 	})
 
-	// New mapping should take effect
+	// New alias should take effect
 	if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-flash" {
 		t.Fatalf("after reload: got %q, want %q", resolved, "gemini-2.5-flash")
 	}
 }
 
-func TestAPIKeyModelMappings_MultipleProviders(t *testing.T) {
+func TestAPIKeyModelAlias_MultipleProviders(t *testing.T) {
 	cfg := &internalconfig.Config{
 		GeminiKey: []internalconfig.GeminiKey{{APIKey: "gemini-key", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro", Alias: "gp"}}}},
 		ClaudeKey: []internalconfig.ClaudeKey{{APIKey: "claude-key", Models: []internalconfig.ClaudeModel{{Name: "claude-sonnet-4", Alias: "cs4"}}}},
@@ -133,7 +133,7 @@ func TestAPIKeyModelMappings_MultipleProviders(t *testing.T) {
 	}
 }
 
-func TestApplyAPIKeyModelMapping(t *testing.T) {
+func TestApplyAPIKeyModelAlias(t *testing.T) {
 	cfg := &internalconfig.Config{
 		GeminiKey: []internalconfig.GeminiKey{
 			{APIKey: "k", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}}},
@@ -170,7 +170,7 @@ func TestApplyAPIKeyModelMapping(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			resolvedModel := mgr.applyAPIKeyModelMapping(tt.auth, tt.inputModel)
+			resolvedModel := mgr.applyAPIKeyModelAlias(tt.auth, tt.inputModel)
 
 			if resolvedModel != tt.wantModel {
 				t.Errorf("model = %q, want %q", resolvedModel, tt.wantModel)
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index f7605d87..43483672 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -119,17 +119,17 @@ type Manager struct {
 	requestRetry     atomic.Int32
 	maxRetryInterval atomic.Int64
 
-	// modelNameMappings stores global model name alias mappings (alias -> upstream name) keyed by channel.
-	modelNameMappings atomic.Value
+	// oauthModelAlias stores global OAuth model alias mappings (alias -> upstream name) keyed by channel.
+	oauthModelAlias atomic.Value
+
+	// apiKeyModelAlias caches resolved model alias mappings for API-key auths.
+	// Keyed by auth.ID, value is alias(lower) -> upstream model (including suffix).
+	apiKeyModelAlias atomic.Value
 
 	// runtimeConfig stores the latest application config for request-time decisions.
 	// It is initialized in NewManager; never Load() before first Store().
 	runtimeConfig atomic.Value
 
-	// apiKeyModelMappings caches resolved model alias mappings for API-key auths.
-	// Keyed by auth.ID, value is alias(lower) -> upstream model (including suffix).
-	apiKeyModelMappings atomic.Value
-
 	// Optional HTTP RoundTripper provider injected by host.
 	rtProvider RoundTripperProvider
 
@@ -155,7 +155,7 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager {
 	}
 	// atomic.Value requires non-nil initial value.
 	manager.runtimeConfig.Store(&internalconfig.Config{})
-	manager.apiKeyModelMappings.Store(apiKeyModelMappingTable(nil))
+	manager.apiKeyModelAlias.Store(apiKeyModelAliasTable(nil))
 	return manager
 }
 
@@ -195,7 +195,7 @@ func (m *Manager) SetConfig(cfg *internalconfig.Config) {
 		cfg = &internalconfig.Config{}
 	}
 	m.runtimeConfig.Store(cfg)
-	m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
+	m.rebuildAPIKeyModelAliasFromRuntimeConfig()
 }
 
 func (m *Manager) lookupAPIKeyUpstreamModel(authID, requestedModel string) string {
@@ -210,7 +210,7 @@ func (m *Manager) lookupAPIKeyUpstreamModel(authID, requestedModel string) strin
 	if requestedModel == "" {
 		return ""
 	}
-	table, _ := m.apiKeyModelMappings.Load().(apiKeyModelMappingTable)
+	table, _ := m.apiKeyModelAlias.Load().(apiKeyModelAliasTable)
 	if table == nil {
 		return ""
 	}
@@ -238,7 +238,7 @@ func (m *Manager) lookupAPIKeyUpstreamModel(authID, requestedModel string) strin
 
 }
 
-func (m *Manager) rebuildAPIKeyModelMappingsFromRuntimeConfig() {
+func (m *Manager) rebuildAPIKeyModelAliasFromRuntimeConfig() {
 	if m == nil {
 		return
 	}
@@ -248,10 +248,10 @@ func (m *Manager) rebuildAPIKeyModelMappingsFromRuntimeConfig() {
 	}
 	m.mu.Lock()
 	defer m.mu.Unlock()
-	m.rebuildAPIKeyModelMappingsLocked(cfg)
+	m.rebuildAPIKeyModelAliasLocked(cfg)
 }
 
-func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) {
+func (m *Manager) rebuildAPIKeyModelAliasLocked(cfg *internalconfig.Config) {
 	if m == nil {
 		return
 	}
@@ -259,7 +259,7 @@ func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) {
 		cfg = &internalconfig.Config{}
 	}
 
-	out := make(apiKeyModelMappingTable)
+	out := make(apiKeyModelAliasTable)
 	for _, auth := range m.auths {
 		if auth == nil {
 			continue
@@ -277,19 +277,19 @@ func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) {
 		switch provider {
 		case "gemini":
 			if entry := resolveGeminiAPIKeyConfig(cfg, auth); entry != nil {
-				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+				compileAPIKeyModelAliasForModels(byAlias, entry.Models)
 			}
 		case "claude":
 			if entry := resolveClaudeAPIKeyConfig(cfg, auth); entry != nil {
-				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+				compileAPIKeyModelAliasForModels(byAlias, entry.Models)
 			}
 		case "codex":
 			if entry := resolveCodexAPIKeyConfig(cfg, auth); entry != nil {
-				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+				compileAPIKeyModelAliasForModels(byAlias, entry.Models)
 			}
 		case "vertex":
 			if entry := resolveVertexAPIKeyConfig(cfg, auth); entry != nil {
-				compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+				compileAPIKeyModelAliasForModels(byAlias, entry.Models)
 			}
 		default:
 			// OpenAI-compat uses config selection from auth.Attributes.
@@ -301,7 +301,7 @@ func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) {
 			}
 			if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
 				if entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider); entry != nil {
-					compileAPIKeyModelMappingsForModels(byAlias, entry.Models)
+					compileAPIKeyModelAliasForModels(byAlias, entry.Models)
 				}
 			}
 		}
@@ -311,10 +311,10 @@ func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) {
 		}
 	}
 
-	m.apiKeyModelMappings.Store(out)
+	m.apiKeyModelAlias.Store(out)
 }
 
-func compileAPIKeyModelMappingsForModels[T interface {
+func compileAPIKeyModelAliasForModels[T interface {
 	GetName() string
 	GetAlias() string
 }](out map[string]string, models []T) {
@@ -408,7 +408,7 @@ func (m *Manager) Register(ctx context.Context, auth *Auth) (*Auth, error) {
 	m.mu.Lock()
 	m.auths[auth.ID] = auth.Clone()
 	m.mu.Unlock()
-	m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
+	m.rebuildAPIKeyModelAliasFromRuntimeConfig()
 	_ = m.persist(ctx, auth)
 	m.hook.OnAuthRegistered(ctx, auth.Clone())
 	return auth.Clone(), nil
@@ -427,7 +427,7 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
 	auth.EnsureIndex()
 	m.auths[auth.ID] = auth.Clone()
 	m.mu.Unlock()
-	m.rebuildAPIKeyModelMappingsFromRuntimeConfig()
+	m.rebuildAPIKeyModelAliasFromRuntimeConfig()
 	_ = m.persist(ctx, auth)
 	m.hook.OnAuthUpdated(ctx, auth.Clone())
 	return auth.Clone(), nil
@@ -456,7 +456,7 @@ func (m *Manager) Load(ctx context.Context) error {
 	if cfg == nil {
 		cfg = &internalconfig.Config{}
 	}
-	m.rebuildAPIKeyModelMappingsLocked(cfg)
+	m.rebuildAPIKeyModelAliasLocked(cfg)
 	return nil
 }
 
@@ -592,8 +592,8 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 		}
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
 		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -641,8 +641,8 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 		}
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
 		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -690,8 +690,8 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 		}
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
 		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
 		if errStream != nil {
 			rerr := &Error{Message: errStream.Error()}
@@ -756,8 +756,8 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
 		}
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
 		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -805,8 +805,8 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
 		}
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
 		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
@@ -854,8 +854,8 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
 		}
 		execReq := req
 		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelMapping(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelMapping(auth, execReq.Model)
+		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
+		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
 		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
 		if errStream != nil {
 			rerr := &Error{Message: errStream.Error()}
@@ -908,7 +908,7 @@ func rewriteModelForAuth(model string, auth *Auth) string {
 	return strings.TrimPrefix(model, needle)
 }
 
-func (m *Manager) applyAPIKeyModelMapping(auth *Auth, requestedModel string) string {
+func (m *Manager) applyAPIKeyModelAlias(auth *Auth, requestedModel string) string {
 	if m == nil || auth == nil {
 		return requestedModel
 	}
@@ -1079,7 +1079,7 @@ func resolveUpstreamModelForOpenAICompatAPIKey(cfg *internalconfig.Config, auth
 	return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models))
 }
 
-type apiKeyModelMappingTable map[string]map[string]string
+type apiKeyModelAliasTable map[string]map[string]string
 
 func resolveOpenAICompatConfig(cfg *internalconfig.Config, providerKey, compatName, authProvider string) *internalconfig.OpenAICompatibility {
 	if cfg == nil {
@@ -1109,11 +1109,11 @@ func resolveOpenAICompatConfig(cfg *internalconfig.Config, providerKey, compatNa
 func asModelAliasEntries[T interface {
 	GetName() string
 	GetAlias() string
-}](models []T) []modelMappingEntry {
+}](models []T) []modelAliasEntry {
 	if len(models) == 0 {
 		return nil
 	}
-	out := make([]modelMappingEntry, 0, len(models))
+	out := make([]modelAliasEntry, 0, len(models))
 	for i := range models {
 		out = append(out, models[i])
 	}
diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/oauth_model_alias.go
similarity index 73%
rename from sdk/cliproxy/auth/model_name_mappings.go
rename to sdk/cliproxy/auth/oauth_model_alias.go
index 24fcf50f..4111663e 100644
--- a/sdk/cliproxy/auth/model_name_mappings.go
+++ b/sdk/cliproxy/auth/oauth_model_alias.go
@@ -7,24 +7,24 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 )
 
-type modelMappingEntry interface {
+type modelAliasEntry interface {
 	GetName() string
 	GetAlias() string
 }
 
-type modelNameMappingTable struct {
+type oauthModelAliasTable struct {
 	// reverse maps channel -> alias (lower) -> original upstream model name.
 	reverse map[string]map[string]string
 }
 
-func compileModelNameMappingTable(mappings map[string][]internalconfig.ModelNameMapping) *modelNameMappingTable {
-	if len(mappings) == 0 {
-		return &modelNameMappingTable{}
+func compileOAuthModelAliasTable(aliases map[string][]internalconfig.OAuthModelAlias) *oauthModelAliasTable {
+	if len(aliases) == 0 {
+		return &oauthModelAliasTable{}
 	}
-	out := &modelNameMappingTable{
-		reverse: make(map[string]map[string]string, len(mappings)),
+	out := &oauthModelAliasTable{
+		reverse: make(map[string]map[string]string, len(aliases)),
 	}
-	for rawChannel, entries := range mappings {
+	for rawChannel, entries := range aliases {
 		channel := strings.ToLower(strings.TrimSpace(rawChannel))
 		if channel == "" || len(entries) == 0 {
 			continue
@@ -55,24 +55,24 @@ func compileModelNameMappingTable(mappings map[string][]internalconfig.ModelName
 	return out
 }
 
-// SetOAuthModelMappings updates the OAuth model name mapping table used during execution.
-// The mapping is applied per-auth channel to resolve the upstream model name while keeping the
+// SetOAuthModelAlias updates the OAuth model name alias table used during execution.
+// The alias is applied per-auth channel to resolve the upstream model name while keeping the
 // client-visible model name unchanged for translation/response formatting.
-func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.ModelNameMapping) {
+func (m *Manager) SetOAuthModelAlias(aliases map[string][]internalconfig.OAuthModelAlias) {
 	if m == nil {
 		return
 	}
-	table := compileModelNameMappingTable(mappings)
+	table := compileOAuthModelAliasTable(aliases)
 	// atomic.Value requires non-nil store values.
 	if table == nil {
-		table = &modelNameMappingTable{}
+		table = &oauthModelAliasTable{}
 	}
-	m.modelNameMappings.Store(table)
+	m.oauthModelAlias.Store(table)
 }
 
-// applyOAuthModelMapping resolves the upstream model from OAuth model mappings.
-// If a mapping exists, the returned model is the upstream model.
-func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string) string {
+// applyOAuthModelAlias resolves the upstream model from OAuth model alias.
+// If an alias exists, the returned model is the upstream model.
+func (m *Manager) applyOAuthModelAlias(auth *Auth, requestedModel string) string {
 	upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel)
 	if upstreamModel == "" {
 		return requestedModel
@@ -80,7 +80,7 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string) stri
 	return upstreamModel
 }
 
-func resolveModelAliasFromConfigModels(requestedModel string, models []modelMappingEntry) string {
+func resolveModelAliasFromConfigModels(requestedModel string, models []modelAliasEntry) string {
 	requestedModel = strings.TrimSpace(requestedModel)
 	if requestedModel == "" {
 		return ""
@@ -131,18 +131,18 @@ func resolveModelAliasFromConfigModels(requestedModel string, models []modelMapp
 	return ""
 }
 
-// resolveOAuthUpstreamModel resolves the upstream model name from OAuth model mappings.
-// If a mapping exists, returns the original (upstream) model name that corresponds
+// resolveOAuthUpstreamModel resolves the upstream model name from OAuth model alias.
+// If an alias exists, returns the original (upstream) model name that corresponds
 // to the requested alias.
 //
 // If the requested model contains a thinking suffix (e.g., "gemini-2.5-pro(8192)"),
-// the suffix is preserved in the returned model name. However, if the mapping's
+// the suffix is preserved in the returned model name. However, if the alias's
 // original name already contains a suffix, the config suffix takes priority.
 func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string {
-	return resolveUpstreamModelFromMappingTable(m, auth, requestedModel, modelMappingChannel(auth))
+	return resolveUpstreamModelFromAliasTable(m, auth, requestedModel, modelAliasChannel(auth))
 }
 
-func resolveUpstreamModelFromMappingTable(m *Manager, auth *Auth, requestedModel, channel string) string {
+func resolveUpstreamModelFromAliasTable(m *Manager, auth *Auth, requestedModel, channel string) string {
 	if m == nil || auth == nil {
 		return ""
 	}
@@ -160,8 +160,8 @@ func resolveUpstreamModelFromMappingTable(m *Manager, auth *Auth, requestedModel
 		candidates = append(candidates, requestedModel)
 	}
 
-	raw := m.modelNameMappings.Load()
-	table, _ := raw.(*modelNameMappingTable)
+	raw := m.oauthModelAlias.Load()
+	table, _ := raw.(*oauthModelAliasTable)
 	if table == nil || table.reverse == nil {
 		return ""
 	}
@@ -197,10 +197,10 @@ func resolveUpstreamModelFromMappingTable(m *Manager, auth *Auth, requestedModel
 	return ""
 }
 
-// modelMappingChannel extracts the OAuth model mapping channel from an Auth object.
+// modelAliasChannel extracts the OAuth model alias channel from an Auth object.
 // It determines the provider and auth kind from the Auth's attributes and delegates
-// to OAuthModelMappingChannel for the actual channel resolution.
-func modelMappingChannel(auth *Auth) string {
+// to OAuthModelAliasChannel for the actual channel resolution.
+func modelAliasChannel(auth *Auth) string {
 	if auth == nil {
 		return ""
 	}
@@ -214,20 +214,20 @@ func modelMappingChannel(auth *Auth) string {
 			authKind = "apikey"
 		}
 	}
-	return OAuthModelMappingChannel(provider, authKind)
+	return OAuthModelAliasChannel(provider, authKind)
 }
 
-// OAuthModelMappingChannel returns the OAuth model mapping channel name for a given provider
+// OAuthModelAliasChannel returns the OAuth model alias channel name for a given provider
 // and auth kind. Returns empty string if the provider/authKind combination doesn't support
-// OAuth model mappings (e.g., API key authentication).
+// OAuth model alias (e.g., API key authentication).
 //
 // Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
-func OAuthModelMappingChannel(provider, authKind string) string {
+func OAuthModelAliasChannel(provider, authKind string) string {
 	provider = strings.ToLower(strings.TrimSpace(provider))
 	authKind = strings.ToLower(strings.TrimSpace(authKind))
 	switch provider {
 	case "gemini":
-		// gemini provider uses gemini-api-key config, not oauth-model-mappings.
+		// gemini provider uses gemini-api-key config, not oauth-model-alias.
 		// OAuth-based gemini auth is converted to "gemini-cli" by the synthesizer.
 		return ""
 	case "vertex":
diff --git a/sdk/cliproxy/auth/model_name_mappings_test.go b/sdk/cliproxy/auth/oauth_model_alias_test.go
similarity index 77%
rename from sdk/cliproxy/auth/model_name_mappings_test.go
rename to sdk/cliproxy/auth/oauth_model_alias_test.go
index 77f33bd6..6956411c 100644
--- a/sdk/cliproxy/auth/model_name_mappings_test.go
+++ b/sdk/cliproxy/auth/oauth_model_alias_test.go
@@ -10,15 +10,15 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 	t.Parallel()
 
 	tests := []struct {
-		name     string
-		mappings map[string][]internalconfig.ModelNameMapping
-		channel  string
-		input    string
-		want     string
+		name    string
+		aliases map[string][]internalconfig.OAuthModelAlias
+		channel string
+		input   string
+		want    string
 	}{
 		{
 			name: "numeric suffix preserved",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 			},
 			channel: "gemini-cli",
@@ -27,7 +27,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "level suffix preserved",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"claude": {{Name: "claude-sonnet-4-5-20250514", Alias: "claude-sonnet-4-5"}},
 			},
 			channel: "claude",
@@ -36,7 +36,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "no suffix unchanged",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 			},
 			channel: "gemini-cli",
@@ -45,7 +45,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "config suffix takes priority",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"claude": {{Name: "claude-sonnet-4-5-20250514(low)", Alias: "claude-sonnet-4-5"}},
 			},
 			channel: "claude",
@@ -54,7 +54,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "auto suffix preserved",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 			},
 			channel: "gemini-cli",
@@ -63,7 +63,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "none suffix preserved",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 			},
 			channel: "gemini-cli",
@@ -72,7 +72,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "case insensitive alias lookup with suffix",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "Gemini-2.5-Pro"}},
 			},
 			channel: "gemini-cli",
@@ -80,8 +80,8 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 			want:    "gemini-2.5-pro-exp-03-25(high)",
 		},
 		{
-			name: "no mapping returns empty",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			name: "no alias returns empty",
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 			},
 			channel: "gemini-cli",
@@ -90,7 +90,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "wrong channel returns empty",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 			},
 			channel: "claude",
@@ -99,7 +99,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "empty suffix filtered out",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 			},
 			channel: "gemini-cli",
@@ -108,7 +108,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 		},
 		{
 			name: "incomplete suffix treated as no suffix",
-			mappings: map[string][]internalconfig.ModelNameMapping{
+			aliases: map[string][]internalconfig.OAuthModelAlias{
 				"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro(high"}},
 			},
 			channel: "gemini-cli",
@@ -123,7 +123,7 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 
 			mgr := NewManager(nil, nil, nil)
 			mgr.SetConfig(&internalconfig.Config{})
-			mgr.SetOAuthModelMappings(tt.mappings)
+			mgr.SetOAuthModelAlias(tt.aliases)
 
 			auth := createAuthForChannel(tt.channel)
 			got := mgr.resolveOAuthUpstreamModel(auth, tt.input)
@@ -157,21 +157,21 @@ func createAuthForChannel(channel string) *Auth {
 	}
 }
 
-func TestApplyOAuthModelMapping_SuffixPreservation(t *testing.T) {
+func TestApplyOAuthModelAlias_SuffixPreservation(t *testing.T) {
 	t.Parallel()
 
-	mappings := map[string][]internalconfig.ModelNameMapping{
+	aliases := map[string][]internalconfig.OAuthModelAlias{
 		"gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}},
 	}
 
 	mgr := NewManager(nil, nil, nil)
 	mgr.SetConfig(&internalconfig.Config{})
-	mgr.SetOAuthModelMappings(mappings)
+	mgr.SetOAuthModelAlias(aliases)
 
 	auth := &Auth{ID: "test-auth-id", Provider: "gemini-cli"}
 
-	resolvedModel := mgr.applyOAuthModelMapping(auth, "gemini-2.5-pro(8192)")
+	resolvedModel := mgr.applyOAuthModelAlias(auth, "gemini-2.5-pro(8192)")
 	if resolvedModel != "gemini-2.5-pro-exp-03-25(8192)" {
-		t.Errorf("applyOAuthModelMapping() model = %q, want %q", resolvedModel, "gemini-2.5-pro-exp-03-25(8192)")
+		t.Errorf("applyOAuthModelAlias() model = %q, want %q", resolvedModel, "gemini-2.5-pro-exp-03-25(8192)")
 	}
 }
diff --git a/sdk/cliproxy/builder.go b/sdk/cliproxy/builder.go
index 2e2427f9..5eba18a0 100644
--- a/sdk/cliproxy/builder.go
+++ b/sdk/cliproxy/builder.go
@@ -216,7 +216,7 @@ func (b *Builder) Build() (*Service, error) {
 	// Attach a default RoundTripper provider so providers can opt-in per-auth transports.
 	coreManager.SetRoundTripperProvider(newDefaultRoundTripperProvider())
 	coreManager.SetConfig(b.cfg)
-	coreManager.SetOAuthModelMappings(b.cfg.OAuthModelMappings)
+	coreManager.SetOAuthModelAlias(b.cfg.OAuthModelAlias)
 
 	service := &Service{
 		cfg:            b.cfg,
diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go
index 71603479..7a06ae78 100644
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -554,7 +554,7 @@ func (s *Service) Run(ctx context.Context) error {
 		s.cfgMu.Unlock()
 		if s.coreManager != nil {
 			s.coreManager.SetConfig(newCfg)
-			s.coreManager.SetOAuthModelMappings(newCfg.OAuthModelMappings)
+			s.coreManager.SetOAuthModelAlias(newCfg.OAuthModelAlias)
 		}
 		s.rebindExecutors()
 	}
@@ -849,7 +849,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 			}
 		}
 	}
-	models = applyOAuthModelMappings(s.cfg, provider, authKind, models)
+	models = applyOAuthModelAlias(s.cfg, provider, authKind, models)
 	if len(models) > 0 {
 		key := provider
 		if key == "" {
@@ -1222,28 +1222,28 @@ func rewriteModelInfoName(name, oldID, newID string) string {
 	return name
 }
 
-func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, models []*ModelInfo) []*ModelInfo {
+func applyOAuthModelAlias(cfg *config.Config, provider, authKind string, models []*ModelInfo) []*ModelInfo {
 	if cfg == nil || len(models) == 0 {
 		return models
 	}
-	channel := coreauth.OAuthModelMappingChannel(provider, authKind)
-	if channel == "" || len(cfg.OAuthModelMappings) == 0 {
+	channel := coreauth.OAuthModelAliasChannel(provider, authKind)
+	if channel == "" || len(cfg.OAuthModelAlias) == 0 {
 		return models
 	}
-	mappings := cfg.OAuthModelMappings[channel]
-	if len(mappings) == 0 {
+	aliases := cfg.OAuthModelAlias[channel]
+	if len(aliases) == 0 {
 		return models
 	}
 
-	type mappingEntry struct {
+	type aliasEntry struct {
 		alias string
 		fork  bool
 	}
 
-	forward := make(map[string][]mappingEntry, len(mappings))
-	for i := range mappings {
-		name := strings.TrimSpace(mappings[i].Name)
-		alias := strings.TrimSpace(mappings[i].Alias)
+	forward := make(map[string][]aliasEntry, len(aliases))
+	for i := range aliases {
+		name := strings.TrimSpace(aliases[i].Name)
+		alias := strings.TrimSpace(aliases[i].Alias)
 		if name == "" || alias == "" {
 			continue
 		}
@@ -1251,7 +1251,7 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 			continue
 		}
 		key := strings.ToLower(name)
-		forward[key] = append(forward[key], mappingEntry{alias: alias, fork: mappings[i].Fork})
+		forward[key] = append(forward[key], aliasEntry{alias: alias, fork: aliases[i].Fork})
 	}
 	if len(forward) == 0 {
 		return models
diff --git a/sdk/cliproxy/service_oauth_model_mappings_test.go b/sdk/cliproxy/service_oauth_model_alias_test.go
similarity index 77%
rename from sdk/cliproxy/service_oauth_model_mappings_test.go
rename to sdk/cliproxy/service_oauth_model_alias_test.go
index ca9ff35a..2caf7a17 100644
--- a/sdk/cliproxy/service_oauth_model_mappings_test.go
+++ b/sdk/cliproxy/service_oauth_model_alias_test.go
@@ -6,9 +6,9 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 )
 
-func TestApplyOAuthModelMappings_Rename(t *testing.T) {
+func TestApplyOAuthModelAlias_Rename(t *testing.T) {
 	cfg := &config.Config{
-		OAuthModelMappings: map[string][]config.ModelNameMapping{
+		OAuthModelAlias: map[string][]config.OAuthModelAlias{
 			"codex": {
 				{Name: "gpt-5", Alias: "g5"},
 			},
@@ -18,7 +18,7 @@ func TestApplyOAuthModelMappings_Rename(t *testing.T) {
 		{ID: "gpt-5", Name: "models/gpt-5"},
 	}
 
-	out := applyOAuthModelMappings(cfg, "codex", "oauth", models)
+	out := applyOAuthModelAlias(cfg, "codex", "oauth", models)
 	if len(out) != 1 {
 		t.Fatalf("expected 1 model, got %d", len(out))
 	}
@@ -30,9 +30,9 @@ func TestApplyOAuthModelMappings_Rename(t *testing.T) {
 	}
 }
 
-func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) {
+func TestApplyOAuthModelAlias_ForkAddsAlias(t *testing.T) {
 	cfg := &config.Config{
-		OAuthModelMappings: map[string][]config.ModelNameMapping{
+		OAuthModelAlias: map[string][]config.OAuthModelAlias{
 			"codex": {
 				{Name: "gpt-5", Alias: "g5", Fork: true},
 			},
@@ -42,7 +42,7 @@ func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) {
 		{ID: "gpt-5", Name: "models/gpt-5"},
 	}
 
-	out := applyOAuthModelMappings(cfg, "codex", "oauth", models)
+	out := applyOAuthModelAlias(cfg, "codex", "oauth", models)
 	if len(out) != 2 {
 		t.Fatalf("expected 2 models, got %d", len(out))
 	}
@@ -57,9 +57,9 @@ func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) {
 	}
 }
 
-func TestApplyOAuthModelMappings_ForkAddsMultipleAliases(t *testing.T) {
+func TestApplyOAuthModelAlias_ForkAddsMultipleAliases(t *testing.T) {
 	cfg := &config.Config{
-		OAuthModelMappings: map[string][]config.ModelNameMapping{
+		OAuthModelAlias: map[string][]config.OAuthModelAlias{
 			"codex": {
 				{Name: "gpt-5", Alias: "g5", Fork: true},
 				{Name: "gpt-5", Alias: "g5-2", Fork: true},
@@ -70,7 +70,7 @@ func TestApplyOAuthModelMappings_ForkAddsMultipleAliases(t *testing.T) {
 		{ID: "gpt-5", Name: "models/gpt-5"},
 	}
 
-	out := applyOAuthModelMappings(cfg, "codex", "oauth", models)
+	out := applyOAuthModelAlias(cfg, "codex", "oauth", models)
 	if len(out) != 3 {
 		t.Fatalf("expected 3 models, got %d", len(out))
 	}
diff --git a/sdk/config/config.go b/sdk/config/config.go
index 1ae7ba20..304ccdd8 100644
--- a/sdk/config/config.go
+++ b/sdk/config/config.go
@@ -16,7 +16,7 @@ type StreamingConfig = internalconfig.StreamingConfig
 type TLSConfig = internalconfig.TLSConfig
 type RemoteManagement = internalconfig.RemoteManagement
 type AmpCode = internalconfig.AmpCode
-type ModelNameMapping = internalconfig.ModelNameMapping
+type OAuthModelAlias = internalconfig.OAuthModelAlias
 type PayloadConfig = internalconfig.PayloadConfig
 type PayloadRule = internalconfig.PayloadRule
 type PayloadModelRule = internalconfig.PayloadModelRule

From 4ad6189487ff682a902c0e4570bed7c565dc3ef2 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 19:08:22 +0800
Subject: [PATCH 29/65] refactor(thinking): extract antigravity logic into a
 dedicated provider

---
 internal/logging/global_logger.go             |  10 +-
 .../runtime/executor/antigravity_executor.go  |  78 -------
 .../runtime/executor/thinking_providers.go    |   1 +
 .../thinking/provider/antigravity/apply.go    | 201 ++++++++++++++++++
 internal/thinking/provider/geminicli/apply.go |   4 +-
 test/thinking_conversion_test.go              |   1 +
 6 files changed, 205 insertions(+), 90 deletions(-)
 create mode 100644 internal/thinking/provider/antigravity/apply.go

diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go
index 3b034dc6..63c7af46 100644
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -55,23 +55,15 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 	}
 	levelStr := fmt.Sprintf("%-5s", level)
 
-	// Build fields string (excluding request_id which is already shown)
+	// Build fields string (only print fields in logFieldOrder)
 	var fieldsStr string
 	if len(entry.Data) > 0 {
-		seen := make(map[string]bool)
 		var fields []string
 		for _, k := range logFieldOrder {
 			if v, ok := entry.Data[k]; ok {
 				fields = append(fields, fmt.Sprintf("%s=%v", k, v))
-				seen[k] = true
 			}
 		}
-		for k, v := range entry.Data {
-			if k == "request_id" || seen[k] {
-				continue
-			}
-			fields = append(fields, fmt.Sprintf("%s=%v", k, v))
-		}
 		if len(fields) > 0 {
 			fieldsStr = " " + strings.Join(fields, " ")
 		}
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 46b2d4ea..0c5d511f 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -141,8 +141,6 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 		return resp, err
 	}
 
-	// Preserve Claude special handling (use baseModel for registry lookups)
-	translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -262,8 +260,6 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 		return resp, err
 	}
 
-	// Preserve Claude special handling (use baseModel for registry lookups)
-	translated = normalizeAntigravityThinking(baseModel, translated, true)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -603,7 +599,6 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
 	ctx = context.WithValue(ctx, "alt", "")
-	isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
 
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
@@ -631,8 +626,6 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 		return nil, err
 	}
 
-	// Preserve Claude special handling (use baseModel for registry lookups)
-	translated = normalizeAntigravityThinking(baseModel, translated, isClaude)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
 
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -790,7 +783,6 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
 // CountTokens counts tokens for the given request using the Antigravity API.
 func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
-	isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
 
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
@@ -815,8 +807,6 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 		return cliproxyexecutor.Response{}, err
 	}
 
-	// Preserve Claude special handling (use baseModel for registry lookups)
-	payload = normalizeAntigravityThinking(baseModel, payload, isClaude)
 	payload = deleteJSONField(payload, "project")
 	payload = deleteJSONField(payload, "model")
 	payload = deleteJSONField(payload, "request.safetySettings")
@@ -1447,71 +1437,3 @@ func generateProjectID() string {
 	randomPart := strings.ToLower(uuid.NewString())[:5]
 	return adj + "-" + noun + "-" + randomPart
 }
-
-// normalizeAntigravityThinking performs Antigravity-specific thinking config normalization.
-// This function is called AFTER thinking.ApplyThinking() to apply Claude-specific constraints.
-//
-// It handles:
-//   - Stripping thinking config for unsupported models
-//   - Normalizing budget to model range (via thinking.ClampBudget)
-//   - For Claude models: ensuring thinking budget < max_tokens
-//   - For Claude models: removing thinkingConfig if budget < minimum allowed
-func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte {
-	modelInfo := registry.LookupModelInfo(model)
-	if modelInfo == nil || modelInfo.Thinking == nil {
-		// Model doesn't support thinking - strip any thinking config
-		return thinking.StripThinkingConfig(payload, "antigravity")
-	}
-	budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
-	if !budget.Exists() {
-		return payload
-	}
-	raw := int(budget.Int())
-	normalized := thinking.ClampBudget(raw, modelInfo, "antigravity")
-
-	if isClaude {
-		effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
-		if effectiveMax > 0 && normalized >= effectiveMax {
-			normalized = effectiveMax - 1
-		}
-		minBudget := antigravityMinThinkingBudget(model)
-		if minBudget > 0 && normalized >= 0 && normalized < minBudget {
-			// Budget is below minimum, remove thinking config entirely
-			payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
-			return payload
-		}
-		if setDefaultMax {
-			if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
-				payload = res
-			}
-		}
-	}
-
-	updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
-	if err != nil {
-		return payload
-	}
-	return updated
-}
-
-// antigravityEffectiveMaxTokens returns the max tokens to cap thinking:
-// prefer request-provided maxOutputTokens; otherwise fall back to model default.
-// The boolean indicates whether the value came from the model default (and thus should be written back).
-func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) {
-	if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
-		return int(maxTok.Int()), false
-	}
-	if modelInfo := registry.LookupModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
-		return modelInfo.MaxCompletionTokens, true
-	}
-	return 0, false
-}
-
-// antigravityMinThinkingBudget returns the minimum thinking budget for a model.
-// Falls back to -1 if no model info is found.
-func antigravityMinThinkingBudget(model string) int {
-	if modelInfo := registry.LookupModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
-		return modelInfo.Thinking.Min
-	}
-	return -1
-}
diff --git a/internal/runtime/executor/thinking_providers.go b/internal/runtime/executor/thinking_providers.go
index 99ac468d..5a143670 100644
--- a/internal/runtime/executor/thinking_providers.go
+++ b/internal/runtime/executor/thinking_providers.go
@@ -1,6 +1,7 @@
 package executor
 
 import (
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
diff --git a/internal/thinking/provider/antigravity/apply.go b/internal/thinking/provider/antigravity/apply.go
new file mode 100644
index 00000000..9c1c79f6
--- /dev/null
+++ b/internal/thinking/provider/antigravity/apply.go
@@ -0,0 +1,201 @@
+// Package antigravity implements thinking configuration for Antigravity API format.
+//
+// Antigravity uses request.generationConfig.thinkingConfig.* path (same as gemini-cli)
+// but requires additional normalization for Claude models:
+//   - Ensure thinking budget < max_tokens
+//   - Remove thinkingConfig if budget < minimum allowed
+package antigravity
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Antigravity API format.
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Antigravity thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("antigravity", NewApplier())
+}
+
+// Apply applies thinking configuration to Antigravity request body.
+//
+// For Claude models, additional constraints are applied:
+//   - Ensure thinking budget < max_tokens
+//   - Remove thinkingConfig if budget < minimum allowed
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return a.applyCompatible(body, config, modelInfo)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	isClaude := strings.Contains(strings.ToLower(modelInfo.ID), "claude")
+
+	// ModeAuto: Always use Budget format with thinkingBudget=-1
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+	}
+	if config.Mode == thinking.ModeBudget {
+		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+	}
+
+	// For non-auto modes, choose format based on model capabilities
+	support := modelInfo.Thinking
+	if len(support.Levels) > 0 {
+		return a.applyLevelFormat(body, config)
+	}
+	return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	isClaude := false
+	if modelInfo != nil {
+		isClaude = strings.Contains(strings.ToLower(modelInfo.ID), "claude")
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config, modelInfo, isClaude)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo, isClaude bool) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
+
+	budget := config.Budget
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	// Apply Claude-specific constraints
+	if isClaude && modelInfo != nil {
+		budget, result = a.normalizeClaudeBudget(budget, result, modelInfo)
+		// Check if budget was removed entirely
+		if budget == -2 {
+			return result, nil
+		}
+	}
+
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
+
+// normalizeClaudeBudget applies Claude-specific constraints to thinking budget.
+//
+// It handles:
+//   - Ensuring thinking budget < max_tokens
+//   - Removing thinkingConfig if budget < minimum allowed
+//
+// Returns the normalized budget and updated payload.
+// Returns budget=-2 as a sentinel indicating thinkingConfig was removed entirely.
+func (a *Applier) normalizeClaudeBudget(budget int, payload []byte, modelInfo *registry.ModelInfo) (int, []byte) {
+	if modelInfo == nil {
+		return budget, payload
+	}
+
+	// Get effective max tokens
+	effectiveMax, setDefaultMax := a.effectiveMaxTokens(payload, modelInfo)
+	if effectiveMax > 0 && budget >= effectiveMax {
+		budget = effectiveMax - 1
+	}
+
+	// Check minimum budget
+	minBudget := 0
+	if modelInfo.Thinking != nil {
+		minBudget = modelInfo.Thinking.Min
+	}
+	if minBudget > 0 && budget >= 0 && budget < minBudget {
+		// Budget is below minimum, remove thinking config entirely
+		payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
+		return -2, payload
+	}
+
+	// Set default max tokens if needed
+	if setDefaultMax && effectiveMax > 0 {
+		payload, _ = sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax)
+	}
+
+	return budget, payload
+}
+
+// effectiveMaxTokens returns the max tokens to cap thinking:
+// prefer request-provided maxOutputTokens; otherwise fall back to model default.
+// The boolean indicates whether the value came from the model default (and thus should be written back).
+func (a *Applier) effectiveMaxTokens(payload []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
+	if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
+		return int(maxTok.Int()), false
+	}
+	if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+		return modelInfo.MaxCompletionTokens, true
+	}
+	return 0, false
+}
diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go
index c8887723..75d9242a 100644
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -22,9 +22,7 @@ func NewApplier() *Applier {
 }
 
 func init() {
-	applier := NewApplier()
-	thinking.RegisterProvider("gemini-cli", applier)
-	thinking.RegisterProvider("antigravity", applier)
+	thinking.RegisterProvider("gemini-cli", NewApplier())
 }
 
 // Apply applies thinking configuration to Gemini CLI request body.
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 7e35c389..f28aa630 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -8,6 +8,7 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
 
 	// Import provider packages to trigger init() registration of ProviderAppliers
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"

From 199cf480b0a961330da537d36fbf503c2a343855 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 19:32:12 +0800
Subject: [PATCH 30/65] refactor(thinking): remove support for non-standard
 thinking configurations

This change removes the translation logic for several non-standard, proprietary extensions used to configure thinking/reasoning. Specifically, support for `extra_body.google.thinking_config` and the Anthropic-style `thinking` object has been dropped from the OpenAI request translators.

This simplification streamlines the translators, focusing them on the standard `reasoning_effort` parameter. It also removes the need to look up model information from the registry within these components.

BREAKING CHANGE: Support for non-standard thinking configurations via `extra_body.google.thinking_config` and the Anthropic-style `thinking` object has been removed. Clients should now use the standard `reasoning_effort` parameter to control reasoning.
---
 .../antigravity_openai_request.go             | 43 -------------------
 .../gemini-cli_openai_request.go              | 29 -------------
 .../chat-completions/gemini_openai_request.go | 29 -------------
 .../gemini_openai-responses_request.go        | 23 ----------
 4 files changed, 124 deletions(-)

diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 7cfaa6e9..a4819ae7 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -8,7 +8,6 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -38,7 +37,6 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 
 	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
 	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
-	modelInfo := registry.LookupModelInfo(modelName)
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
@@ -54,47 +52,6 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		}
 	}
 
-	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-
-			if v := tc.Get("thinkingBudget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingBudget.thinkingBudget", budget)
-				setBudget = true
-			}
-
-			if v := tc.Get("includeThoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && budget != 0 {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-			}
-		}
-	}
-
-	// Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens
-	// This allows Claude Code and other Claude API clients to pass thinking configuration
-	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelInfo != nil && modelInfo.Thinking != nil {
-		if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-			if t.Get("type").String() == "enabled" {
-				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := int(b.Int())
-					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-				}
-			}
-		}
-	}
-
 	// Temperature/top_p/top_k/max_tokens
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index 09d1dea7..938a5ae4 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -8,7 +8,6 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -39,7 +38,6 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
 	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
-	modelInfo := registry.LookupModelInfo(modelName)
 	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
 		if effort != "" {
@@ -54,33 +52,6 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		}
 	}
 
-	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-
-			if v := tc.Get("thinkingBudget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			}
-
-			if v := tc.Get("includeThoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && budget != 0 {
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-			}
-		}
-	}
-
 	// Temperature/top_p/top_k
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 6e1a5014..fedd8dca 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -8,7 +8,6 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -38,7 +37,6 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 
 	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig.
 	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
-	modelInfo := registry.LookupModelInfo(modelName)
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
@@ -54,33 +52,6 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		}
 	}
 
-	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-
-			if v := tc.Get("thinkingBudget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			}
-
-			if v := tc.Get("includeThoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && budget != 0 {
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-			}
-		}
-	}
-
 	// Temperature/top_p/top_k
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "generationConfig.temperature", tr.Num)
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index 81bb7d40..41279977 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"strings"
 
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -390,7 +389,6 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 
 	// Apply thinking configuration: convert OpenAI Responses API reasoning.effort to Gemini thinkingConfig.
 	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
-	modelInfo := registry.LookupModelInfo(modelName)
 	re := root.Get("reasoning.effort")
 	if re.Exists() {
 		effort := strings.ToLower(strings.TrimSpace(re.String()))
@@ -406,27 +404,6 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 		}
 	}
 
-	// Cherry Studio extension (applies only when official fields are missing)
-	// Only apply for models that use numeric budgets, not discrete levels.
-	if !re.Exists() && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
-			var setBudget bool
-			var budget int
-			if v := tc.Get("thinking_budget"); v.Exists() {
-				budget = int(v.Int())
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-				setBudget = true
-			}
-			if v := tc.Get("include_thoughts"); v.Exists() {
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget {
-				if budget != 0 {
-					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-				}
-			}
-		}
-	}
-
 	result := []byte(out)
 	result = common.AttachDefaultSafetySettings(result, "safetySettings")
 	return result

From 2b387e169bfd8a3af62ab04c398339178cc14cef Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 15 Jan 2026 20:23:55 +0800
Subject: [PATCH 31/65] feat(iflow): add iflow-rome model definition

---
 internal/registry/model_definitions.go |  1 +
 internal/thinking/apply.go             | 24 ++++++++++++------------
 internal/thinking/validate.go          |  8 ++++----
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 2ef3381b..77669e4b 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -742,6 +742,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
+		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index a2919cea..003405c0 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -87,7 +87,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 		log.WithFields(log.Fields{
 			"provider": provider,
 			"model":    model,
-		}).Debug("thinking: unknown provider, passthrough")
+		}).Debug("thinking: unknown provider, passthrough |")
 		return body, nil
 	}
 
@@ -108,13 +108,13 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 			log.WithFields(log.Fields{
 				"model":    baseModel,
 				"provider": provider,
-			}).Debug("thinking: model does not support thinking, stripping config")
+			}).Debug("thinking: model does not support thinking, stripping config |")
 			return StripThinkingConfig(body, provider), nil
 		}
 		log.WithFields(log.Fields{
 			"provider": provider,
 			"model":    baseModel,
-		}).Debug("thinking: model does not support thinking, passthrough")
+		}).Debug("thinking: model does not support thinking, passthrough |")
 		return body, nil
 	}
 
@@ -128,7 +128,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 			"mode":     config.Mode,
 			"budget":   config.Budget,
 			"level":    config.Level,
-		}).Debug("thinking: config from model suffix")
+		}).Debug("thinking: config from model suffix |")
 	} else {
 		config = extractThinkingConfig(body, provider)
 		if hasThinkingConfig(config) {
@@ -138,7 +138,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 				"mode":     config.Mode,
 				"budget":   config.Budget,
 				"level":    config.Level,
-			}).Debug("thinking: original config from request")
+			}).Debug("thinking: original config from request |")
 		}
 	}
 
@@ -146,7 +146,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 		log.WithFields(log.Fields{
 			"provider": provider,
 			"model":    modelInfo.ID,
-		}).Debug("thinking: no config found, passthrough")
+		}).Debug("thinking: no config found, passthrough |")
 		return body, nil
 	}
 
@@ -157,7 +157,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 			"provider": provider,
 			"model":    modelInfo.ID,
 			"error":    err.Error(),
-		}).Warn("thinking: validation failed")
+		}).Warn("thinking: validation failed |")
 		// Return original body on validation failure (defensive programming).
 		// This ensures callers who ignore the error won't receive nil body.
 		// The upstream service will decide how to handle the unmodified request.
@@ -169,7 +169,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 		log.WithFields(log.Fields{
 			"provider": provider,
 			"model":    modelInfo.ID,
-		}).Warn("thinking: ValidateConfig returned nil config without error, passthrough")
+		}).Warn("thinking: ValidateConfig returned nil config without error, passthrough |")
 		return body, nil
 	}
 
@@ -179,7 +179,7 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 		"mode":     validated.Mode,
 		"budget":   validated.Budget,
 		"level":    validated.Level,
-	}).Debug("thinking: processed config to apply")
+	}).Debug("thinking: processed config to apply |")
 
 	// 6. Apply configuration using provider-specific applier
 	return applier.Apply(body, *validated, modelInfo)
@@ -222,7 +222,7 @@ func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig {
 		"provider":   provider,
 		"model":      model,
 		"raw_suffix": rawSuffix,
-	}).Debug("thinking: unknown suffix format, treating as no config")
+	}).Debug("thinking: unknown suffix format, treating as no config |")
 	return ThinkingConfig{}
 }
 
@@ -249,7 +249,7 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 		log.WithFields(log.Fields{
 			"model":    modelID,
 			"provider": provider,
-		}).Debug("thinking: user-defined model, passthrough (no config)")
+		}).Debug("thinking: user-defined model, passthrough (no config) |")
 		return body, nil
 	}
 
@@ -258,7 +258,7 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 		log.WithFields(log.Fields{
 			"model":    modelID,
 			"provider": provider,
-		}).Debug("thinking: user-defined model, passthrough (unknown provider)")
+		}).Debug("thinking: user-defined model, passthrough (unknown provider) |")
 		return body, nil
 	}
 
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index f16a18f0..aabe04eb 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -44,7 +44,7 @@ func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int
 			"clamped_to":     min,
 			"min":            min,
 			"max":            max,
-		}).Warn("thinking: budget zero not allowed")
+		}).Warn("thinking: budget zero not allowed |")
 		return min
 	}
 
@@ -221,7 +221,7 @@ func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupp
 			"model":         model,
 			"original_mode": "auto",
 			"clamped_to":    string(LevelMedium),
-		}).Debug("thinking: mode converted: dynamic not allowed, using medium level")
+		}).Debug("thinking: mode converted, dynamic not allowed, using medium level |")
 		return config
 	}
 
@@ -242,7 +242,7 @@ func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupp
 		"model":         model,
 		"original_mode": "auto",
 		"clamped_to":    config.Budget,
-	}).Debug("thinking: mode converted: dynamic not allowed")
+	}).Debug("thinking: mode converted, dynamic not allowed |")
 	return config
 }
 
@@ -255,5 +255,5 @@ func logClamp(provider, model string, original, clampedTo, min, max int) {
 		"min":            min,
 		"max":            max,
 		"clamped_to":     clampedTo,
-	}).Debug("thinking: budget clamped")
+	}).Debug("thinking: budget clamped |")
 }

From f571b1deb0cbaa8877a5b62ddf7e971c3fae83cf Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Fri, 16 Jan 2026 08:15:28 +0800
Subject: [PATCH 32/65] feat(config): add support for raw JSON payload rules

- Introduced `default-raw` and `override-raw` rules to handle raw JSON values.
- Enhanced `PayloadConfig` to validate and sanitize raw JSON payload rules.
- Updated executor logic to apply `default-raw` and `override-raw` rules.
- Extended example YAML to include usage of raw JSON rules.
---
 config.example.yaml                          | 12 ++++
 internal/config/config.go                    | 65 ++++++++++++++++++
 internal/runtime/executor/payload_helpers.go | 72 +++++++++++++++++++-
 3 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/config.example.yaml b/config.example.yaml
index b397be07..09307c33 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -275,9 +275,21 @@ oauth-model-alias:
 #           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> value
 #         "generationConfig.thinkingConfig.thinkingBudget": 32768
+#   default-raw: # Default raw rules set parameters using raw JSON when missing (must be valid JSON).
+#     - models:
+#         - name: "gemini-2.5-pro" # Supports wildcards (e.g., "gemini-*")
+#           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
+#       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
+#         "generationConfig.responseJsonSchema": "{\"type\":\"object\",\"properties\":{\"answer\":{\"type\":\"string\"}}}"
 #   override: # Override rules always set parameters, overwriting any existing values.
 #     - models:
 #         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
 #           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
 #       params: # JSON path (gjson/sjson syntax) -> value
 #         "reasoning.effort": "high"
+#   override-raw: # Override raw rules always set parameters using raw JSON (must be valid JSON).
+#     - models:
+#         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
+#           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
+#       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
+#         "response_format": "{\"type\":\"json_schema\",\"json_schema\":{\"name\":\"answer\",\"schema\":{\"type\":\"object\"}}}"
diff --git a/internal/config/config.go b/internal/config/config.go
index c66229a8..0405cfa7 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -6,12 +6,14 @@ package config
 
 import (
 	"bytes"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"strings"
 	"syscall"
 
+	log "github.com/sirupsen/logrus"
 	"golang.org/x/crypto/bcrypt"
 	"gopkg.in/yaml.v3"
 )
@@ -216,8 +218,12 @@ type AmpUpstreamAPIKeyEntry struct {
 type PayloadConfig struct {
 	// Default defines rules that only set parameters when they are missing in the payload.
 	Default []PayloadRule `yaml:"default" json:"default"`
+	// DefaultRaw defines rules that set raw JSON values only when they are missing.
+	DefaultRaw []PayloadRule `yaml:"default-raw" json:"default-raw"`
 	// Override defines rules that always set parameters, overwriting any existing values.
 	Override []PayloadRule `yaml:"override" json:"override"`
+	// OverrideRaw defines rules that always set raw JSON values, overwriting any existing values.
+	OverrideRaw []PayloadRule `yaml:"override-raw" json:"override-raw"`
 }
 
 // PayloadRule describes a single rule targeting a list of models with parameter updates.
@@ -225,6 +231,7 @@ type PayloadRule struct {
 	// Models lists model entries with name pattern and protocol constraint.
 	Models []PayloadModelRule `yaml:"models" json:"models"`
 	// Params maps JSON paths (gjson/sjson syntax) to values written into the payload.
+	// For *-raw rules, values are treated as raw JSON fragments (strings are used as-is).
 	Params map[string]any `yaml:"params" json:"params"`
 }
 
@@ -540,6 +547,9 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Normalize global OAuth model name aliases.
 	cfg.SanitizeOAuthModelAlias()
 
+	// Validate raw payload rules and drop invalid entries.
+	cfg.SanitizePayloadRules()
+
 	if cfg.legacyMigrationPending {
 		fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
 		if !optional && configFile != "" {
@@ -556,6 +566,61 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	return &cfg, nil
 }
 
+// SanitizePayloadRules validates raw JSON payload rule params and drops invalid rules.
+func (cfg *Config) SanitizePayloadRules() {
+	if cfg == nil {
+		return
+	}
+	cfg.Payload.DefaultRaw = sanitizePayloadRawRules(cfg.Payload.DefaultRaw, "default-raw")
+	cfg.Payload.OverrideRaw = sanitizePayloadRawRules(cfg.Payload.OverrideRaw, "override-raw")
+}
+
+func sanitizePayloadRawRules(rules []PayloadRule, section string) []PayloadRule {
+	if len(rules) == 0 {
+		return rules
+	}
+	out := make([]PayloadRule, 0, len(rules))
+	for i := range rules {
+		rule := rules[i]
+		if len(rule.Params) == 0 {
+			continue
+		}
+		invalid := false
+		for path, value := range rule.Params {
+			raw, ok := payloadRawString(value)
+			if !ok {
+				continue
+			}
+			trimmed := bytes.TrimSpace(raw)
+			if len(trimmed) == 0 || !json.Valid(trimmed) {
+				log.WithFields(log.Fields{
+					"section":    section,
+					"rule_index": i + 1,
+					"param":      path,
+				}).Warn("payload rule dropped: invalid raw JSON")
+				invalid = true
+				break
+			}
+		}
+		if invalid {
+			continue
+		}
+		out = append(out, rule)
+	}
+	return out
+}
+
+func payloadRawString(value any) ([]byte, bool) {
+	switch typed := value.(type) {
+	case string:
+		return []byte(typed), true
+	case []byte:
+		return typed, true
+	default:
+		return nil, false
+	}
+}
+
 // SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases.
 // It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
 // allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 9014af87..b4e03c40 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -1,6 +1,7 @@
 package executor
 
 import (
+	"encoding/json"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -17,7 +18,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 		return payload
 	}
 	rules := cfg.Payload
-	if len(rules.Default) == 0 && len(rules.Override) == 0 {
+	if len(rules.Default) == 0 && len(rules.DefaultRaw) == 0 && len(rules.Override) == 0 && len(rules.OverrideRaw) == 0 {
 		return payload
 	}
 	model = strings.TrimSpace(model)
@@ -55,6 +56,35 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			appliedDefaults[fullPath] = struct{}{}
 		}
 	}
+	// Apply default raw rules: first write wins per field across all matching rules.
+	for i := range rules.DefaultRaw {
+		rule := &rules.DefaultRaw[i]
+		if !payloadRuleMatchesModel(rule, model, protocol) {
+			continue
+		}
+		for path, value := range rule.Params {
+			fullPath := buildPayloadPath(root, path)
+			if fullPath == "" {
+				continue
+			}
+			if gjson.GetBytes(source, fullPath).Exists() {
+				continue
+			}
+			if _, ok := appliedDefaults[fullPath]; ok {
+				continue
+			}
+			rawValue, ok := payloadRawValue(value)
+			if !ok {
+				continue
+			}
+			updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
+			if errSet != nil {
+				continue
+			}
+			out = updated
+			appliedDefaults[fullPath] = struct{}{}
+		}
+	}
 	// Apply override rules: last write wins per field across all matching rules.
 	for i := range rules.Override {
 		rule := &rules.Override[i]
@@ -73,6 +103,28 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			out = updated
 		}
 	}
+	// Apply override raw rules: last write wins per field across all matching rules.
+	for i := range rules.OverrideRaw {
+		rule := &rules.OverrideRaw[i]
+		if !payloadRuleMatchesModel(rule, model, protocol) {
+			continue
+		}
+		for path, value := range rule.Params {
+			fullPath := buildPayloadPath(root, path)
+			if fullPath == "" {
+				continue
+			}
+			rawValue, ok := payloadRawValue(value)
+			if !ok {
+				continue
+			}
+			updated, errSet := sjson.SetRawBytes(out, fullPath, rawValue)
+			if errSet != nil {
+				continue
+			}
+			out = updated
+		}
+	}
 	return out
 }
 
@@ -116,6 +168,24 @@ func buildPayloadPath(root, path string) string {
 	return r + "." + p
 }
 
+func payloadRawValue(value any) ([]byte, bool) {
+	if value == nil {
+		return nil, false
+	}
+	switch typed := value.(type) {
+	case string:
+		return []byte(typed), true
+	case []byte:
+		return typed, true
+	default:
+		raw, errMarshal := json.Marshal(typed)
+		if errMarshal != nil {
+			return nil, false
+		}
+		return raw, true
+	}
+}
+
 // matchModelPattern performs simple wildcard matching where '*' matches zero or more characters.
 // Examples:
 //

From 526dd866ba2409d91265c8da545e1f7a718116ce Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Fri, 16 Jan 2026 10:39:16 +0800
Subject: [PATCH 33/65] refactor(gemini): replace static model handling with
 dynamic model registry lookup

---
 sdk/api/handlers/gemini/gemini_handlers.go | 115 +++++----------------
 1 file changed, 28 insertions(+), 87 deletions(-)

diff --git a/sdk/api/handlers/gemini/gemini_handlers.go b/sdk/api/handlers/gemini/gemini_handlers.go
index f2bdb058..c4a04c85 100644
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -85,94 +85,35 @@ func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) {
 		return
 	}
 	action := strings.TrimPrefix(request.Action, "/")
-	switch action {
-	case "gemini-3-pro-preview":
-		c.JSON(http.StatusOK, gin.H{
-			"name":             "models/gemini-3-pro-preview",
-			"version":          "3",
-			"displayName":      "Gemini 3 Pro Preview",
-			"description":      "Gemini 3 Pro Preview",
-			"inputTokenLimit":  1048576,
-			"outputTokenLimit": 65536,
-			"supportedGenerationMethods": []string{
-				"generateContent",
-				"countTokens",
-				"createCachedContent",
-				"batchGenerateContent",
-			},
-			"temperature":    1,
-			"topP":           0.95,
-			"topK":           64,
-			"maxTemperature": 2,
-			"thinking":       true,
-		},
-		)
-	case "gemini-2.5-pro":
-		c.JSON(http.StatusOK, gin.H{
-			"name":             "models/gemini-2.5-pro",
-			"version":          "2.5",
-			"displayName":      "Gemini 2.5 Pro",
-			"description":      "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			"inputTokenLimit":  1048576,
-			"outputTokenLimit": 65536,
-			"supportedGenerationMethods": []string{
-				"generateContent",
-				"countTokens",
-				"createCachedContent",
-				"batchGenerateContent",
-			},
-			"temperature":    1,
-			"topP":           0.95,
-			"topK":           64,
-			"maxTemperature": 2,
-			"thinking":       true,
-		},
-		)
-	case "gemini-2.5-flash":
-		c.JSON(http.StatusOK, gin.H{
-			"name":             "models/gemini-2.5-flash",
-			"version":          "001",
-			"displayName":      "Gemini 2.5 Flash",
-			"description":      "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			"inputTokenLimit":  1048576,
-			"outputTokenLimit": 65536,
-			"supportedGenerationMethods": []string{
-				"generateContent",
-				"countTokens",
-				"createCachedContent",
-				"batchGenerateContent",
-			},
-			"temperature":    1,
-			"topP":           0.95,
-			"topK":           64,
-			"maxTemperature": 2,
-			"thinking":       true,
-		})
-	case "gpt-5":
-		c.JSON(http.StatusOK, gin.H{
-			"name":             "gpt-5",
-			"version":          "001",
-			"displayName":      "GPT 5",
-			"description":      "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			"inputTokenLimit":  400000,
-			"outputTokenLimit": 128000,
-			"supportedGenerationMethods": []string{
-				"generateContent",
-			},
-			"temperature":    1,
-			"topP":           0.95,
-			"topK":           64,
-			"maxTemperature": 2,
-			"thinking":       true,
-		})
-	default:
-		c.JSON(http.StatusNotFound, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Not Found",
-				Type:    "not_found",
-			},
-		})
+
+	// Get dynamic models from the global registry and find the matching one
+	availableModels := h.Models()
+	var targetModel map[string]any
+
+	for _, model := range availableModels {
+		name, _ := model["name"].(string)
+		// Match name with or without 'models/' prefix
+		if name == action || name == "models/"+action {
+			targetModel = model
+			break
+		}
 	}
+
+	if targetModel != nil {
+		// Ensure the name has 'models/' prefix in the output if it's a Gemini model
+		if name, ok := targetModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
+			targetModel["name"] = "models/" + name
+		}
+		c.JSON(http.StatusOK, targetModel)
+		return
+	}
+
+	c.JSON(http.StatusNotFound, handlers.ErrorResponse{
+		Error: handlers.ErrorDetail{
+			Message: "Not Found",
+			Type:    "not_found",
+		},
+	})
 }
 
 // GeminiHandler handles POST requests for Gemini API operations.

From cec4e251bd36191c27313f86f6cc17ad2a4afdb8 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Fri, 16 Jan 2026 11:35:34 +0800
Subject: [PATCH 34/65] feat(translator): preserve `text` field in serialized
 output during chat completions processing

---
 .../openai/chat-completions/antigravity_openai_request.go        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index a4819ae7..d52b1a53 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -209,6 +209,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
+							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.

From 48cba39a12cca0321d98515ef7ef7f132fb1a224 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Fri, 16 Jan 2026 12:30:12 +0800
Subject: [PATCH 35/65] feat(codex): add config toggle for codex instructions
 injection

---
 config.example.yaml                 |  4 ++++
 internal/api/server.go              | 12 ++++++++++++
 internal/config/config.go           |  5 +++++
 internal/misc/codex_instructions.go | 19 +++++++++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/config.example.yaml b/config.example.yaml
index 09307c33..a8bc3e85 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -85,6 +85,10 @@ nonstream-keepalive-interval: 0
 #   keepalive-seconds: 15   # Default: 0 (disabled). <= 0 disables keep-alives.
 #   bootstrap-retries: 1    # Default: 0 (disabled). Retries before first byte is sent.
 
+# When true, enable custom Codex instructions injection for Codex API requests.
+# When false (default), CodexInstructionsForModel returns immediately without modification.
+codex-instructions-enabled: false
+
 # Gemini API keys
 # gemini-api-key:
 #   - api-key: "AIzaSy...01"
diff --git a/internal/api/server.go b/internal/api/server.go
index 5b425e7c..831bf003 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -26,6 +26,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
@@ -254,6 +255,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	}
 	managementasset.SetCurrentConfig(cfg)
 	auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
+	misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
 	// Initialize management handler
 	s.mgmt = managementHandlers.NewHandler(cfg, configFilePath, authManager)
 	if optionState.localPassword != "" {
@@ -912,6 +914,16 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 			log.Debugf("disable_cooling toggled to %t", cfg.DisableCooling)
 		}
 	}
+
+	if oldCfg == nil || oldCfg.CodexInstructionsEnabled != cfg.CodexInstructionsEnabled {
+		misc.SetCodexInstructionsEnabled(cfg.CodexInstructionsEnabled)
+		if oldCfg != nil {
+			log.Debugf("codex_instructions_enabled updated from %t to %t", oldCfg.CodexInstructionsEnabled, cfg.CodexInstructionsEnabled)
+		} else {
+			log.Debugf("codex_instructions_enabled toggled to %t", cfg.CodexInstructionsEnabled)
+		}
+	}
+
 	if s.handlers != nil && s.handlers.AuthManager != nil {
 		s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
 	}
diff --git a/internal/config/config.go b/internal/config/config.go
index 0405cfa7..7ba20a1a 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -71,6 +71,11 @@ type Config struct {
 	// WebsocketAuth enables or disables authentication for the WebSocket API.
 	WebsocketAuth bool `yaml:"ws-auth" json:"ws-auth"`
 
+	// CodexInstructionsEnabled controls whether custom Codex instructions are injected.
+	// When false (default), CodexInstructionsForModel returns immediately without modification.
+	// When true, the original instruction injection logic is used.
+	CodexInstructionsEnabled bool `yaml:"codex-instructions-enabled" json:"codex-instructions-enabled"`
+
 	// GeminiKey defines Gemini API key configurations with optional routing overrides.
 	GeminiKey []GeminiKey `yaml:"gemini-api-key" json:"gemini-api-key"`
 
diff --git a/internal/misc/codex_instructions.go b/internal/misc/codex_instructions.go
index 9d0971c5..fb870ffc 100644
--- a/internal/misc/codex_instructions.go
+++ b/internal/misc/codex_instructions.go
@@ -7,11 +7,27 @@ import (
 	"embed"
 	_ "embed"
 	"strings"
+	"sync/atomic"
 
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
+// codexInstructionsEnabled controls whether CodexInstructionsForModel returns custom instructions.
+// When false (default), CodexInstructionsForModel returns (true, "") immediately.
+// Set via SetCodexInstructionsEnabled from config.
+var codexInstructionsEnabled atomic.Bool
+
+// SetCodexInstructionsEnabled sets whether codex instructions processing is enabled.
+func SetCodexInstructionsEnabled(enabled bool) {
+	codexInstructionsEnabled.Store(enabled)
+}
+
+// GetCodexInstructionsEnabled returns whether codex instructions processing is enabled.
+func GetCodexInstructionsEnabled() bool {
+	return codexInstructionsEnabled.Load()
+}
+
 //go:embed codex_instructions
 var codexInstructionsDir embed.FS
 
@@ -124,6 +140,9 @@ func codexInstructionsForCodex(modelName, systemInstructions string) (bool, stri
 }
 
 func CodexInstructionsForModel(modelName, systemInstructions, userAgent string) (bool, string) {
+	if !GetCodexInstructionsEnabled() {
+		return true, ""
+	}
 	if IsOpenCodeUserAgent(userAgent) {
 		return codexInstructionsForOpenCode(systemInstructions)
 	}

From ea3d22831e37d424863f6aa9c112454553472521 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Fri, 16 Jan 2026 12:44:57 +0800
Subject: [PATCH 36/65] refactor(codex): update terminology to "official
 instructions" for clarity

---
 config.example.yaml                 | 2 +-
 internal/misc/codex_instructions.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index a8bc3e85..ce402f99 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -85,7 +85,7 @@ nonstream-keepalive-interval: 0
 #   keepalive-seconds: 15   # Default: 0 (disabled). <= 0 disables keep-alives.
 #   bootstrap-retries: 1    # Default: 0 (disabled). Retries before first byte is sent.
 
-# When true, enable custom Codex instructions injection for Codex API requests.
+# When true, enable official Codex instructions injection for Codex API requests.
 # When false (default), CodexInstructionsForModel returns immediately without modification.
 codex-instructions-enabled: false
 
diff --git a/internal/misc/codex_instructions.go b/internal/misc/codex_instructions.go
index fb870ffc..d50e8cef 100644
--- a/internal/misc/codex_instructions.go
+++ b/internal/misc/codex_instructions.go
@@ -13,7 +13,7 @@ import (
 	"github.com/tidwall/sjson"
 )
 
-// codexInstructionsEnabled controls whether CodexInstructionsForModel returns custom instructions.
+// codexInstructionsEnabled controls whether CodexInstructionsForModel returns official instructions.
 // When false (default), CodexInstructionsForModel returns (true, "") immediately.
 // Set via SetCodexInstructionsEnabled from config.
 var codexInstructionsEnabled atomic.Bool

From aa0b63e2144904afc73f55b0fb5033db906e8034 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Fri, 16 Jan 2026 12:50:09 +0800
Subject: [PATCH 37/65] refactor(config): clarify Codex instruction toggle
 documentation

---
 internal/config/config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/config/config.go b/internal/config/config.go
index 7ba20a1a..3b7e9fa7 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -71,7 +71,7 @@ type Config struct {
 	// WebsocketAuth enables or disables authentication for the WebSocket API.
 	WebsocketAuth bool `yaml:"ws-auth" json:"ws-auth"`
 
-	// CodexInstructionsEnabled controls whether custom Codex instructions are injected.
+	// CodexInstructionsEnabled controls whether official Codex instructions are injected.
 	// When false (default), CodexInstructionsForModel returns immediately without modification.
 	// When true, the original instruction injection logic is used.
 	CodexInstructionsEnabled bool `yaml:"codex-instructions-enabled" json:"codex-instructions-enabled"`

From c3ef46f4096e7544a5a7518304fd5a34f7e17879 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Fri, 16 Jan 2026 13:36:57 +0800
Subject: [PATCH 38/65] feat(config): supplement missing default aliases during
 antigravity migration

---
 internal/config/oauth_model_alias_migration.go  | 17 +++++++++++++++++
 .../config/oauth_model_alias_migration_test.go  | 17 +++++++++++++++++
 .../runtime/executor/antigravity_executor.go    |  2 +-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/internal/config/oauth_model_alias_migration.go b/internal/config/oauth_model_alias_migration.go
index 0e3b2156..5cc8053a 100644
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -122,6 +122,23 @@ func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node,
 		newAliases[channel] = converted
 	}
 
+	// For antigravity channel, supplement missing default aliases
+	if antigravityEntries, exists := newAliases["antigravity"]; exists {
+		// Build a set of already configured model names (upstream names)
+		configuredModels := make(map[string]bool, len(antigravityEntries))
+		for _, entry := range antigravityEntries {
+			configuredModels[entry.Name] = true
+		}
+
+		// Add missing default aliases
+		for _, defaultAlias := range defaultAntigravityAliases() {
+			if !configuredModels[defaultAlias.Name] {
+				antigravityEntries = append(antigravityEntries, defaultAlias)
+			}
+		}
+		newAliases["antigravity"] = antigravityEntries
+	}
+
 	// Build new node
 	newNode := buildOAuthModelAliasNode(newAliases)
 
diff --git a/internal/config/oauth_model_alias_migration_test.go b/internal/config/oauth_model_alias_migration_test.go
index ab5a1f49..db9c0a11 100644
--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -114,6 +114,23 @@ func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
 	if !strings.Contains(content, "gemini-3-pro-high") {
 		t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high")
 	}
+
+	// Verify missing default aliases were supplemented
+	if !strings.Contains(content, "gemini-3-pro-image") {
+		t.Fatal("expected missing default alias gemini-3-pro-image to be added")
+	}
+	if !strings.Contains(content, "gemini-3-flash") {
+		t.Fatal("expected missing default alias gemini-3-flash to be added")
+	}
+	if !strings.Contains(content, "claude-sonnet-4-5") {
+		t.Fatal("expected missing default alias claude-sonnet-4-5 to be added")
+	}
+	if !strings.Contains(content, "claude-sonnet-4-5-thinking") {
+		t.Fatal("expected missing default alias claude-sonnet-4-5-thinking to be added")
+	}
+	if !strings.Contains(content, "claude-opus-4-5-thinking") {
+		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
+	}
 }
 
 func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index d2001a71..7ae50e39 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -1221,7 +1221,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		payload = []byte(strJSON)
 	}
 
-	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-preview") {
+	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
 		systemInstructionPartsResult := gjson.GetBytes(payload, "request.systemInstruction.parts")
 		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.role", "user")
 		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.0.text", systemInstruction)

From 902bea24b44b9c45cc4efd0a4e2736bef7e2a6fe Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Fri, 16 Jan 2026 15:38:10 +0800
Subject: [PATCH 39/65] fix(codex): ensure instructions field exists

---
 internal/runtime/executor/codex_executor.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 273987ea..0abec8af 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -106,6 +106,9 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -213,6 +216,9 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.SetBytes(body, "model", baseModel)
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -317,6 +323,9 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.SetBytes(body, "stream", false)
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}
 
 	enc, err := tokenizerForCodexModel(baseModel)
 	if err != nil {

From 6600d58ba22ab2fb9e2e2658452d7167b1b22c65 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Fri, 16 Jan 2026 19:59:01 +0800
Subject: [PATCH 40/65] feat(codex): enhance input transformation and remove
 unused `safety_identifier` field

- Added logic to transform `inputResults` into structured JSON for improved processing.
- Removed redundant `safety_identifier` field in executor payload to streamline requests.
---
 internal/runtime/executor/codex_executor.go                  | 3 +++
 .../codex/openai/responses/codex_openai-responses_request.go | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 0abec8af..eeefe6bc 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -106,6 +106,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
 	}
@@ -215,6 +216,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
@@ -322,6 +324,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "stream", false)
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
diff --git a/internal/translator/codex/openai/responses/codex_openai-responses_request.go b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
index e1691a5b..33dbf112 100644
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -74,6 +74,11 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	}
 
 	if hasOfficialInstructions {
+		newInput := "[]"
+		for _, item := range inputResults {
+			newInput, _ = sjson.SetRaw(newInput, "-1", item.Raw)
+		}
+		rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(newInput))
 		return rawJSON
 	}
 	// log.Debugf("instructions not matched, %s\n", originalInstructions)

From 65b4e1ec6c797813f6ce4bf2d78d8ba207b3d24f Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sat, 17 Jan 2026 04:12:29 +0800
Subject: [PATCH 41/65] feat(codex): enable instruction toggling and update
 role terminology

- Added conditional logic for Codex instruction injection based on configuration.
- Updated role terminology from "user" to "developer" for better alignment with context.
---
 .../codex/claude/codex_claude_request.go      | 28 ++++++++++---------
 .../codex/gemini/codex_gemini_request.go      |  2 +-
 .../chat-completions/codex_openai_request.go  |  8 ++++--
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go
index a3157833..17f2f674 100644
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -52,7 +52,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	systemsResult := rootResult.Get("system")
 	if systemsResult.IsArray() {
 		systemResults := systemsResult.Array()
-		message := `{"type":"message","role":"user","content":[]}`
+		message := `{"type":"message","role":"developer","content":[]}`
 		for i := 0; i < len(systemResults); i++ {
 			systemResult := systemResults[i]
 			systemTypeResult := systemResult.Get("type")
@@ -245,21 +245,23 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	template, _ = sjson.Set(template, "include", []string{"reasoning.encrypted_content"})
 
 	// Add a first message to ignore system instructions and ensure proper execution.
-	inputResult := gjson.Get(template, "input")
-	if inputResult.Exists() && inputResult.IsArray() {
-		inputResults := inputResult.Array()
-		newInput := "[]"
-		for i := 0; i < len(inputResults); i++ {
-			if i == 0 {
-				firstText := inputResults[i].Get("content.0.text")
-				firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
-				if firstText.Exists() && firstText.String() != firstInstructions {
-					newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+	if misc.GetCodexInstructionsEnabled() {
+		inputResult := gjson.Get(template, "input")
+		if inputResult.Exists() && inputResult.IsArray() {
+			inputResults := inputResult.Array()
+			newInput := "[]"
+			for i := 0; i < len(inputResults); i++ {
+				if i == 0 {
+					firstText := inputResults[i].Get("content.0.text")
+					firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
+					if firstText.Exists() && firstText.String() != firstInstructions {
+						newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+					}
 				}
+				newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
 			}
-			newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
+			template, _ = sjson.SetRaw(template, "input", newInput)
 		}
-		template, _ = sjson.SetRaw(template, "input", newInput)
 	}
 
 	return []byte(template)
diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go
index fe5c0a5f..d7d0a109 100644
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -95,7 +95,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// System instruction -> as a user message with input_text parts
 	sysParts := root.Get("system_instruction.parts")
 	if sysParts.IsArray() {
-		msg := `{"type":"message","role":"user","content":[]}`
+		msg := `{"type":"message","role":"developer","content":[]}`
 		arr := sysParts.Array()
 		for i := 0; i < len(arr); i++ {
 			p := arr[i]
diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request.go b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
index b68d2792..40f56f88 100644
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -33,7 +33,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	rawJSON := bytes.Clone(inputRawJSON)
 	userAgent := misc.ExtractCodexUserAgent(rawJSON)
 	// Start with empty JSON object
-	out := `{}`
+	out := `{"instructions":""}`
 
 	// Stream must be set to true
 	out, _ = sjson.Set(out, "stream", stream)
@@ -98,7 +98,9 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	// Extract system instructions from first system message (string or text object)
 	messages := gjson.GetBytes(rawJSON, "messages")
 	_, instructions := misc.CodexInstructionsForModel(modelName, "", userAgent)
-	out, _ = sjson.Set(out, "instructions", instructions)
+	if misc.GetCodexInstructionsEnabled() {
+		out, _ = sjson.Set(out, "instructions", instructions)
+	}
 	// if messages.IsArray() {
 	// 	arr := messages.Array()
 	// 	for i := 0; i < len(arr); i++ {
@@ -141,7 +143,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 				msg := `{}`
 				msg, _ = sjson.Set(msg, "type", "message")
 				if role == "system" {
-					msg, _ = sjson.Set(msg, "role", "user")
+					msg, _ = sjson.Set(msg, "role", "developer")
 				} else {
 					msg, _ = sjson.Set(msg, "role", role)
 				}

From 384578a88ccc489b02f4919fe86d85d5979b71f1 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sat, 17 Jan 2026 04:44:09 +0800
Subject: [PATCH 42/65] feat(cliproxy, gemini): improve ID matching logic and
 enrich normalized model output

- Enhanced ID matching in `cliproxy` by adding additional conditions to better handle ID equality cases.
- Updated `gemini` handlers to include `displayName` and `description` in normalized models for enriched metadata.
---
 sdk/api/handlers/gemini/gemini_handlers.go | 8 ++++++--
 sdk/cliproxy/service.go                    | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/sdk/api/handlers/gemini/gemini_handlers.go b/sdk/api/handlers/gemini/gemini_handlers.go
index c4a04c85..27d8d1f5 100644
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -56,8 +56,12 @@ func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
 		for k, v := range model {
 			normalizedModel[k] = v
 		}
-		if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
-			normalizedModel["name"] = "models/" + name
+		if name, ok := normalizedModel["name"].(string); ok && name != "" {
+			if !strings.HasPrefix(name, "models/") {
+				normalizedModel["name"] = "models/" + name
+			}
+			normalizedModel["displayName"] = name
+			normalizedModel["description"] = name
 		}
 		if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
 			normalizedModel["supportedGenerationMethods"] = defaultMethods
diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go
index 7a06ae78..5b343e49 100644
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -1212,6 +1212,9 @@ func rewriteModelInfoName(name, oldID, newID string) string {
 	if strings.EqualFold(oldID, newID) {
 		return name
 	}
+	if strings.EqualFold(trimmed, oldID) {
+		return newID
+	}
 	if strings.HasSuffix(trimmed, "/"+oldID) {
 		prefix := strings.TrimSuffix(trimmed, oldID)
 		return prefix + newID

From bc7167e9feb191096e25ce989e6e05d1008c6690 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sat, 17 Jan 2026 05:05:24 +0800
Subject: [PATCH 43/65] feat(runtime): add model alias support and enhance
 payload rule matching

- Introduced `payloadModelAliases` and `payloadModelCandidates` functions to support model aliases for improved flexibility.
- Updated rule matching logic to handle multiple model candidates.
- Refactored variable naming in executor to improve code clarity and consistency.
---
 .../runtime/executor/antigravity_executor.go  | 21 +++--
 internal/runtime/executor/payload_helpers.go  | 80 ++++++++++++++++++-
 2 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 7ae50e39..47113cfc 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -517,8 +517,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
 		}
 		if usageResult := responseNode.Get("usageMetadata"); usageResult.Exists() {
 			usageRaw = usageResult.Raw
-		} else if usageResult := root.Get("usageMetadata"); usageResult.Exists() {
-			usageRaw = usageResult.Raw
+		} else if usageMetadataResult := root.Get("usageMetadata"); usageMetadataResult.Exists() {
+			usageRaw = usageMetadataResult.Raw
 		}
 
 		if partsResult := responseNode.Get("candidates.0.content.parts"); partsResult.IsArray() {
@@ -642,7 +642,6 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 			err = errReq
 			return nil, err
 		}
-
 		httpResp, errDo := httpClient.Do(httpReq)
 		if errDo != nil {
 			recordAPIResponseError(ctx, e.cfg, errDo)
@@ -1004,10 +1003,10 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 			case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
 				continue
 			}
-			cfg := modelConfig[modelID]
+			modelCfg := modelConfig[modelID]
 			modelName := modelID
-			if cfg != nil && cfg.Name != "" {
-				modelName = cfg.Name
+			if modelCfg != nil && modelCfg.Name != "" {
+				modelName = modelCfg.Name
 			}
 			modelInfo := &registry.ModelInfo{
 				ID:          modelID,
@@ -1021,12 +1020,12 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				Type:        antigravityAuthType,
 			}
 			// Look up Thinking support from static config using upstream model name.
-			if cfg != nil {
-				if cfg.Thinking != nil {
-					modelInfo.Thinking = cfg.Thinking
+			if modelCfg != nil {
+				if modelCfg.Thinking != nil {
+					modelInfo.Thinking = modelCfg.Thinking
 				}
-				if cfg.MaxCompletionTokens > 0 {
-					modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
+				if modelCfg.MaxCompletionTokens > 0 {
+					modelInfo.MaxCompletionTokens = modelCfg.MaxCompletionTokens
 				}
 			}
 			models = append(models, modelInfo)
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index b4e03c40..364e2ee9 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -25,6 +25,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	if model == "" {
 		return payload
 	}
+	candidates := payloadModelCandidates(cfg, model, protocol)
 	out := payload
 	source := original
 	if len(source) == 0 {
@@ -34,7 +35,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default rules: first write wins per field across all matching rules.
 	for i := range rules.Default {
 		rule := &rules.Default[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -59,7 +60,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default raw rules: first write wins per field across all matching rules.
 	for i := range rules.DefaultRaw {
 		rule := &rules.DefaultRaw[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -88,7 +89,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply override rules: last write wins per field across all matching rules.
 	for i := range rules.Override {
 		rule := &rules.Override[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -106,7 +107,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply override raw rules: last write wins per field across all matching rules.
 	for i := range rules.OverrideRaw {
 		rule := &rules.OverrideRaw[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -128,6 +129,18 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	return out
 }
 
+func payloadRuleMatchesModels(rule *config.PayloadRule, protocol string, models []string) bool {
+	if rule == nil || len(models) == 0 {
+		return false
+	}
+	for _, model := range models {
+		if payloadRuleMatchesModel(rule, model, protocol) {
+			return true
+		}
+	}
+	return false
+}
+
 func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) bool {
 	if rule == nil {
 		return false
@@ -150,6 +163,65 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b
 	return false
 }
 
+func payloadModelCandidates(cfg *config.Config, model, protocol string) []string {
+	model = strings.TrimSpace(model)
+	if model == "" {
+		return nil
+	}
+	candidates := []string{model}
+	if cfg == nil {
+		return candidates
+	}
+	aliases := payloadModelAliases(cfg, model, protocol)
+	if len(aliases) == 0 {
+		return candidates
+	}
+	seen := map[string]struct{}{strings.ToLower(model): struct{}{}}
+	for _, alias := range aliases {
+		alias = strings.TrimSpace(alias)
+		if alias == "" {
+			continue
+		}
+		key := strings.ToLower(alias)
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		candidates = append(candidates, alias)
+	}
+	return candidates
+}
+
+func payloadModelAliases(cfg *config.Config, model, protocol string) []string {
+	if cfg == nil {
+		return nil
+	}
+	model = strings.TrimSpace(model)
+	if model == "" {
+		return nil
+	}
+	channel := strings.ToLower(strings.TrimSpace(protocol))
+	if channel == "" {
+		return nil
+	}
+	entries := cfg.OAuthModelAlias[channel]
+	if len(entries) == 0 {
+		return nil
+	}
+	aliases := make([]string, 0, 2)
+	for _, entry := range entries {
+		if !strings.EqualFold(strings.TrimSpace(entry.Name), model) {
+			continue
+		}
+		alias := strings.TrimSpace(entry.Alias)
+		if alias == "" {
+			continue
+		}
+		aliases = append(aliases, alias)
+	}
+	return aliases
+}
+
 // buildPayloadPath combines an optional root path with a relative parameter path.
 // When root is empty, the parameter path is used as-is. When root is non-empty,
 // the parameter path is treated as relative to root.

From f8f3ad84fcaf33443746e6c0d2eba420220046bb Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sat, 17 Jan 2026 05:40:56 +0800
Subject: [PATCH 44/65] Fixed: #1064

feat(translator): improve system message handling and content indexing across translators

- Updated logic for processing system messages in `claude`, `gemini`, `gemini-cli`, and `antigravity` translators.
- Introduced indexing for `systemInstruction.parts` to ensure proper ordering and handling of multi-part content.
- Added safeguards for accurate content transformation and serialization.
---
 .../antigravity_openai_request.go             | 10 +++++--
 .../chat-completions/claude_openai_request.go | 30 +++++++++++++++----
 .../gemini-cli_openai_request.go              | 10 +++++--
 .../chat-completions/gemini_openai_request.go | 12 +++++---
 4 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index d52b1a53..89e486c0 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -132,6 +132,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			}
 		}
 
+		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -141,16 +142,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
+					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
+					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
 						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
+							systemPartIndex++
 						}
 					}
 				}
diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index 3a165a3d..8aa14793 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -141,17 +141,35 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 
 	// Process messages and transform them to Claude Code format
 	if messages := root.Get("messages"); messages.Exists() && messages.IsArray() {
+		messageIndex := 0
+		systemMessageIndex := -1
 		messages.ForEach(func(_, message gjson.Result) bool {
 			role := message.Get("role").String()
 			contentResult := message.Get("content")
 
 			switch role {
-			case "system", "user", "assistant":
-				// Create Claude Code message with appropriate role mapping
-				if role == "system" {
-					role = "user"
+			case "system":
+				if systemMessageIndex == -1 {
+					systemMsg := `{"role":"user","content":[]}`
+					out, _ = sjson.SetRaw(out, "messages.-1", systemMsg)
+					systemMessageIndex = messageIndex
+					messageIndex++
 				}
-
+				if contentResult.Exists() && contentResult.Type == gjson.String && contentResult.String() != "" {
+					textPart := `{"type":"text","text":""}`
+					textPart, _ = sjson.Set(textPart, "text", contentResult.String())
+					out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
+				} else if contentResult.Exists() && contentResult.IsArray() {
+					contentResult.ForEach(func(_, part gjson.Result) bool {
+						if part.Get("type").String() == "text" {
+							textPart := `{"type":"text","text":""}`
+							textPart, _ = sjson.Set(textPart, "text", part.Get("text").String())
+							out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
+						}
+						return true
+					})
+				}
+			case "user", "assistant":
 				msg := `{"role":"","content":[]}`
 				msg, _ = sjson.Set(msg, "role", role)
 
@@ -230,6 +248,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				}
 
 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
+				messageIndex++
 
 			case "tool":
 				// Handle tool result messages conversion
@@ -240,6 +259,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				msg, _ = sjson.Set(msg, "content.0.tool_use_id", toolCallID)
 				msg, _ = sjson.Set(msg, "content.0.content", content)
 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
+				messageIndex++
 			}
 			return true
 		})
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index 938a5ae4..af161b5c 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -129,6 +129,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 			}
 		}
 
+		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -138,16 +139,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
+					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
+					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
 						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
+							systemPartIndex++
 						}
 					}
 				}
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index fedd8dca..27805dd8 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -129,6 +129,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			}
 		}
 
+		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -138,16 +139,19 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				// system -> system_instruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
-					out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.String())
+					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
-					out, _ = sjson.SetBytes(out, "system_instruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.Get("text").String())
+					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
-						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
+						out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
+							systemPartIndex++
 						}
 					}
 				}

From 109cffc010d8f6fbec8d0ee3c456c11502f15873 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sat, 17 Jan 2026 12:20:58 +0800
Subject: [PATCH 45/65] refactor(auth): simplify filename prefixes for qwen and
 iflow tokens

---
 internal/api/handlers/management/auth_files.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/internal/api/handlers/management/auth_files.go b/internal/api/handlers/management/auth_files.go
index 27c9a902..e6830d1d 100644
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -1703,7 +1703,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 		// Create token storage
 		tokenStorage := qwenAuth.CreateTokenStorage(tokenData)
 
-		tokenStorage.Email = fmt.Sprintf("qwen-%d", time.Now().UnixMilli())
+		tokenStorage.Email = fmt.Sprintf("%d", time.Now().UnixMilli())
 		record := &coreauth.Auth{
 			ID:       fmt.Sprintf("qwen-%s.json", tokenStorage.Email),
 			Provider: "qwen",
@@ -1808,7 +1808,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 		tokenStorage := authSvc.CreateTokenStorage(tokenData)
 		identifier := strings.TrimSpace(tokenStorage.Email)
 		if identifier == "" {
-			identifier = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+			identifier = fmt.Sprintf("%d", time.Now().UnixMilli())
 			tokenStorage.Email = identifier
 		}
 		record := &coreauth.Auth{
@@ -1893,15 +1893,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	fileName := iflowauth.SanitizeIFlowFileName(email)
 	if fileName == "" {
 		fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+	} else {
+		fileName = fmt.Sprintf("iflow-%s", fileName)
 	}
 
 	tokenStorage.Email = email
 	timestamp := time.Now().Unix()
 
 	record := &coreauth.Auth{
-		ID:       fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		ID:       fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Provider: "iflow",
-		FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		FileName: fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Storage:  tokenStorage,
 		Metadata: map[string]any{
 			"email":        email,

From 8549a92e9a2e47b3353d36ada7018bb02b46a31b Mon Sep 17 00:00:00 2001
From: Tubagus <54710482+0xtbug@users.noreply.github.com>
Date: Sat, 17 Jan 2026 11:29:22 +0700
Subject: [PATCH 46/65] docs(readme): add ZeroLimit to projects based on
 CLIProxyAPI

Added ZeroLimit app to the list of projects in README.
---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 7875a989..07d47905 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,10 @@ Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth
 
 VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.
 
+### [ZeroLimit](https://github.com/0xtbug/zero-limit)
+
+Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
+
 > [!NOTE]  
 > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
 

From dbba71028ec35debe89a991ba031571d442fc903 Mon Sep 17 00:00:00 2001
From: Tubagus <54710482+0xtbug@users.noreply.github.com>
Date: Sat, 17 Jan 2026 11:30:15 +0700
Subject: [PATCH 47/65] docs(readme): add ZeroLimit to projects based on
 CLIProxyAPI

---
 README_CN.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README_CN.md b/README_CN.md
index fdc8d64c..8600aada 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -129,6 +129,10 @@ CLI 封装器，用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户
 
 一款 VSCode 扩展，提供了在 VSCode 中快速切换 Claude Code 模型的功能，内置 CLIProxyAPI 作为其后端，支持后台自动启动和关闭。
 
+### [ZeroLimit](https://github.com/0xtbug/zero-limit)
+
+Windows 桌面应用，基于 Tauri + React 构建，用于监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪，提供实时仪表盘、系统托盘集成和一键代理控制，无需 API 密钥。
+
 > [!NOTE]  
 > 如果你开发了基于 CLIProxyAPI 的项目，请提交一个 PR（拉取请求）将其添加到此列表中。
 

From f89feb881c01ab250fc44f493651fb58b312622d Mon Sep 17 00:00:00 2001
From: Tubagus <54710482+0xtbug@users.noreply.github.com>
Date: Sat, 17 Jan 2026 11:33:18 +0700
Subject: [PATCH 48/65] Update README.md

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 07d47905..bd339982 100644
--- a/README.md
+++ b/README.md
@@ -132,7 +132,7 @@ VSCode extension for quick switching between Claude Code models, featuring integ
 
 ### [ZeroLimit](https://github.com/0xtbug/zero-limit)
 
-Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
+Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
 
 > [!NOTE]  
 > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.

From c8843edb81bb975e642abac84cb54e04af281b3b Mon Sep 17 00:00:00 2001
From: Tubagus <54710482+0xtbug@users.noreply.github.com>
Date: Sat, 17 Jan 2026 11:33:29 +0700
Subject: [PATCH 49/65] Update README_CN.md

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 README_CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_CN.md b/README_CN.md
index 8600aada..1b3ed74b 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -131,7 +131,7 @@ CLI 封装器，用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户
 
 ### [ZeroLimit](https://github.com/0xtbug/zero-limit)
 
-Windows 桌面应用，基于 Tauri + React 构建，用于监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪，提供实时仪表盘、系统托盘集成和一键代理控制，无需 API 密钥。
+Windows 桌面应用，基于 Tauri + React 构建，用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪，提供实时仪表盘、系统托盘集成和一键代理控制，无需 API 密钥。
 
 > [!NOTE]  
 > 如果你开发了基于 CLIProxyAPI 的项目，请提交一个 PR（拉取请求）将其添加到此列表中。

From 46433a25f8f75a0b538624262bc6dc959e77994d Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sun, 18 Jan 2026 00:50:10 +0800
Subject: [PATCH 50/65] fix(translator): add check for empty `text` to prevent
 invalid serialization in `gemini` and `antigravity`

---
 .../chat-completions/antigravity_openai_request.go     | 10 ++++++++--
 .../openai/chat-completions/gemini_openai_request.go   |  9 ++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 89e486c0..94546bda 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -169,7 +169,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range items {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							imageURL := item.Get("image_url.url").String()
@@ -213,7 +216,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 27805dd8..7ad005b9 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -166,7 +166,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					for _, item := range items {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							imageURL := item.Get("image_url.url").String()
@@ -211,6 +214,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.

From dd6d78cb31b1dc268510c5bffeb1e0f960a1817c Mon Sep 17 00:00:00 2001
From: sususu98 <suchangshan@foxmail.com>
Date: Sat, 17 Jan 2026 17:50:10 +0800
Subject: [PATCH 51/65] fix(antigravity): convert non-string enum values to
 strings for Gemini API

Gemini API requires all enum values in function declarations to be
strings. Some MCP tools (e.g., roxybrowser) define schemas with numeric
enums like `"enum": [0, 1, 2]`, causing INVALID_ARGUMENT errors.

Add convertEnumValuesToStrings() to automatically convert numeric and
boolean enum values to their string representations during schema
transformation.
---
 internal/util/gemini_schema.go      | 28 ++++++++++++++++
 internal/util/gemini_schema_test.go | 51 +++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/internal/util/gemini_schema.go b/internal/util/gemini_schema.go
index 38d3773e..c7cb0f40 100644
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -19,6 +19,7 @@ func CleanJSONSchemaForAntigravity(jsonStr string) string {
 	// Phase 1: Convert and add hints
 	jsonStr = convertRefsToHints(jsonStr)
 	jsonStr = convertConstToEnum(jsonStr)
+	jsonStr = convertEnumValuesToStrings(jsonStr)
 	jsonStr = addEnumHints(jsonStr)
 	jsonStr = addAdditionalPropertiesHints(jsonStr)
 	jsonStr = moveConstraintsToDescription(jsonStr)
@@ -77,6 +78,33 @@ func convertConstToEnum(jsonStr string) string {
 	return jsonStr
 }
 
+// convertEnumValuesToStrings ensures all enum values are strings.
+// Gemini API requires enum values to be of type string, not numbers or booleans.
+func convertEnumValuesToStrings(jsonStr string) string {
+	for _, p := range findPaths(jsonStr, "enum") {
+		arr := gjson.Get(jsonStr, p)
+		if !arr.IsArray() {
+			continue
+		}
+
+		var stringVals []string
+		needsConversion := false
+		for _, item := range arr.Array() {
+			// Check if any value is not a string
+			if item.Type != gjson.String {
+				needsConversion = true
+			}
+			stringVals = append(stringVals, item.String())
+		}
+
+		// Only update if we found non-string values
+		if needsConversion {
+			jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
+		}
+	}
+	return jsonStr
+}
+
 func addEnumHints(jsonStr string) string {
 	for _, p := range findPaths(jsonStr, "enum") {
 		arr := gjson.Get(jsonStr, p)
diff --git a/internal/util/gemini_schema_test.go b/internal/util/gemini_schema_test.go
index 60335f22..ca77225e 100644
--- a/internal/util/gemini_schema_test.go
+++ b/internal/util/gemini_schema_test.go
@@ -818,3 +818,54 @@ func TestCleanJSONSchemaForAntigravity_MultipleFormats(t *testing.T) {
 		t.Errorf("date-time format hint should be added, got: %s", result)
 	}
 }
+
+func TestCleanJSONSchemaForAntigravity_NumericEnumToString(t *testing.T) {
+	// Gemini API requires enum values to be strings, not numbers
+	input := `{
+		"type": "object",
+		"properties": {
+			"priority": {"type": "integer", "enum": [0, 1, 2]},
+			"level": {"type": "number", "enum": [1.5, 2.5, 3.5]},
+			"status": {"type": "string", "enum": ["active", "inactive"]}
+		}
+	}`
+
+	result := CleanJSONSchemaForAntigravity(input)
+
+	// Numeric enum values should be converted to strings
+	if strings.Contains(result, `"enum":[0,1,2]`) {
+		t.Errorf("Integer enum values should be converted to strings, got: %s", result)
+	}
+	if strings.Contains(result, `"enum":[1.5,2.5,3.5]`) {
+		t.Errorf("Float enum values should be converted to strings, got: %s", result)
+	}
+	// Should contain string versions
+	if !strings.Contains(result, `"0"`) || !strings.Contains(result, `"1"`) || !strings.Contains(result, `"2"`) {
+		t.Errorf("Integer enum values should be converted to string format, got: %s", result)
+	}
+	// String enum values should remain unchanged
+	if !strings.Contains(result, `"active"`) || !strings.Contains(result, `"inactive"`) {
+		t.Errorf("String enum values should remain unchanged, got: %s", result)
+	}
+}
+
+func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) {
+	// Boolean enum values should also be converted to strings
+	input := `{
+		"type": "object",
+		"properties": {
+			"enabled": {"type": "boolean", "enum": [true, false]}
+		}
+	}`
+
+	result := CleanJSONSchemaForAntigravity(input)
+
+	// Boolean enum values should be converted to strings
+	if strings.Contains(result, `"enum":[true,false]`) {
+		t.Errorf("Boolean enum values should be converted to strings, got: %s", result)
+	}
+	// Should contain string versions "true" and "false"
+	if !strings.Contains(result, `"true"`) || !strings.Contains(result, `"false"`) {
+		t.Errorf("Boolean enum values should be converted to string format, got: %s", result)
+	}
+}

From 97b67e0e4991cf45c777385400c3319d3f156ee5 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sat, 17 Jan 2026 21:01:30 +0800
Subject: [PATCH 52/65] test(thinking): split E2E coverage into suffix and body
 parameter test functions

Refactor thinking configuration tests by separating model name suffix-based
scenarios from request body parameter-based scenarios into distinct test
functions with independent case numbering.

Architectural improvements:
- Extract thinkingTestCase struct to package level for shared usage
- Add getTestModels() helper returning complete model fixture set
- Introduce runThinkingTests() runner with protocol-specific field detection
- Register level-subset-model fixture with constrained low/high level support
- Extend iflow protocol handling for glm-test and minimax-test models
- Add same-protocol strict boundary validation cases (80-89)
- Replace error responses with clamped values for boundary-exceeding budgets
---
 test/thinking_conversion_test.go | 3368 ++++++++++++++++++++++--------
 1 file changed, 2476 insertions(+), 892 deletions(-)

diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index f28aa630..91490fa2 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -22,15 +22,2425 @@ import (
 	"github.com/tidwall/gjson"
 )
 
-// TestThinkingE2EMatrix tests the thinking configuration transformation using the real data flow path.
+// thinkingTestCase represents a common test case structure for both suffix and body tests.
+type thinkingTestCase struct {
+	name            string
+	from            string
+	to              string
+	model           string
+	inputJSON       string
+	expectField     string
+	expectValue     string
+	includeThoughts string
+	expectErr       bool
+}
+
+// TestThinkingE2EMatrix_Suffix tests the thinking configuration transformation using model name suffix.
 // Data flow: Input JSON → TranslateRequest → ApplyThinking → Validate Output
 // No helper functions are used; all test data is inline.
-func TestThinkingE2EMatrix(t *testing.T) {
-	// Register test models directly
+func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 	reg := registry.GetGlobalRegistry()
-	uid := fmt.Sprintf("thinking-e2e-%d", time.Now().UnixNano())
+	uid := fmt.Sprintf("thinking-e2e-suffix-%d", time.Now().UnixNano())
 
-	testModels := []*registry.ModelInfo{
+	reg.RegisterClient(uid, "test", getTestModels())
+	defer reg.UnregisterClient(uid)
+
+	cases := []thinkingTestCase{
+		// level-model (Levels=minimal/low/medium/high, ZeroAllowed=false, DynamicAllowed=false)
+
+		// Case 1: No suffix → injected default → medium
+		{
+			name:        "1",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 2: Specified medium → medium
+		{
+			name:        "2",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model(medium)",
+			inputJSON:   `{"model":"level-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 3: Specified xhigh → out of range error
+		{
+			name:        "3",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model(xhigh)",
+			inputJSON:   `{"model":"level-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 4: Level none → clamped to minimal (ZeroAllowed=false)
+		{
+			name:        "4",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model(none)",
+			inputJSON:   `{"model":"level-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 5: Level auto → DynamicAllowed=false → medium (mid-range)
+		{
+			name:        "5",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model(auto)",
+			inputJSON:   `{"model":"level-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 6: No suffix from gemini → injected default → medium
+		{
+			name:        "6",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 7: Budget 8192 → medium
+		{
+			name:        "7",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model(8192)",
+			inputJSON:   `{"model":"level-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 8: Budget 64000 → clamped to high
+		{
+			name:        "8",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model(64000)",
+			inputJSON:   `{"model":"level-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 9: Budget 0 → clamped to minimal (ZeroAllowed=false)
+		{
+			name:        "9",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model(0)",
+			inputJSON:   `{"model":"level-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 10: Budget -1 → auto → DynamicAllowed=false → medium (mid-range)
+		{
+			name:        "10",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model(-1)",
+			inputJSON:   `{"model":"level-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 11: Claude source no suffix → passthrough (no thinking)
+		{
+			name:        "11",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 12: Budget 8192 → medium
+		{
+			name:        "12",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model(8192)",
+			inputJSON:   `{"model":"level-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 13: Budget 64000 → clamped to high
+		{
+			name:        "13",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model(64000)",
+			inputJSON:   `{"model":"level-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 14: Budget 0 → clamped to minimal (ZeroAllowed=false)
+		{
+			name:        "14",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model(0)",
+			inputJSON:   `{"model":"level-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 15: Budget -1 → auto → DynamicAllowed=false → medium (mid-range)
+		{
+			name:        "15",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model(-1)",
+			inputJSON:   `{"model":"level-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+
+		// level-subset-model (Levels=low/high, ZeroAllowed=false, DynamicAllowed=false)
+
+		// Case 16: Budget 8192 → medium → rounded down to low
+		{
+			name:        "16",
+			from:        "gemini",
+			to:          "openai",
+			model:       "level-subset-model(8192)",
+			inputJSON:   `{"model":"level-subset-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "low",
+			expectErr:   false,
+		},
+		// Case 17: Budget 1 → minimal → clamped to low (min supported)
+		{
+			name:            "17",
+			from:            "claude",
+			to:              "gemini",
+			model:           "level-subset-model(1)",
+			inputJSON:       `{"model":"level-subset-model(1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// gemini-budget-model (Min=128, Max=20000, ZeroAllowed=false, DynamicAllowed=true)
+
+		// Case 18: No suffix → passthrough
+		{
+			name:        "18",
+			from:        "openai",
+			to:          "gemini",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 19: Effort medium → 8192
+		{
+			name:            "19",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model(medium)",
+			inputJSON:       `{"model":"gemini-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 20: Effort xhigh → clamped to 20000 (max)
+		{
+			name:            "20",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model(xhigh)",
+			inputJSON:       `{"model":"gemini-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 21: Effort none → clamped to 128 (min) → includeThoughts=false
+		{
+			name:            "21",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model(none)",
+			inputJSON:       `{"model":"gemini-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "128",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 22: Effort auto → DynamicAllowed=true → -1
+		{
+			name:            "22",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model(auto)",
+			inputJSON:       `{"model":"gemini-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 23: Claude source no suffix → passthrough
+		{
+			name:        "23",
+			from:        "claude",
+			to:          "gemini",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 24: Budget 8192 → 8192
+		{
+			name:            "24",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model(8192)",
+			inputJSON:       `{"model":"gemini-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 25: Budget 64000 → clamped to 20000 (max)
+		{
+			name:            "25",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model(64000)",
+			inputJSON:       `{"model":"gemini-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 26: Budget 0 → clamped to 128 (min) → includeThoughts=false
+		{
+			name:            "26",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model(0)",
+			inputJSON:       `{"model":"gemini-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "128",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 27: Budget -1 → DynamicAllowed=true → -1
+		{
+			name:            "27",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model(-1)",
+			inputJSON:       `{"model":"gemini-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// gemini-mixed-model (Min=128, Max=32768, Levels=low/high, ZeroAllowed=false, DynamicAllowed=true)
+
+		// Case 28: OpenAI source no suffix → passthrough
+		{
+			name:        "28",
+			from:        "openai",
+			to:          "gemini",
+			model:       "gemini-mixed-model",
+			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 29: Effort high → low/high supported → high
+		{
+			name:            "29",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-mixed-model(high)",
+			inputJSON:       `{"model":"gemini-mixed-model(high)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "high",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 30: Effort xhigh → not in low/high → error
+		{
+			name:        "30",
+			from:        "openai",
+			to:          "gemini",
+			model:       "gemini-mixed-model(xhigh)",
+			inputJSON:   `{"model":"gemini-mixed-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 31: Effort none → clamped to low (min supported) → includeThoughts=false
+		{
+			name:            "31",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-mixed-model(none)",
+			inputJSON:       `{"model":"gemini-mixed-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 32: Effort auto → DynamicAllowed=true → -1 (budget)
+		{
+			name:            "32",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-mixed-model(auto)",
+			inputJSON:       `{"model":"gemini-mixed-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 33: Claude source no suffix → passthrough
+		{
+			name:        "33",
+			from:        "claude",
+			to:          "gemini",
+			model:       "gemini-mixed-model",
+			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 34: Budget 8192 → 8192 (keep budget)
+		{
+			name:            "34",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model(8192)",
+			inputJSON:       `{"model":"gemini-mixed-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 35: Budget 64000 → clamped to 32768 (max)
+		{
+			name:            "35",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model(64000)",
+			inputJSON:       `{"model":"gemini-mixed-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "32768",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 36: Budget 0 → minimal → clamped to low (min level) → includeThoughts=false
+		{
+			name:            "36",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model(0)",
+			inputJSON:       `{"model":"gemini-mixed-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 37: Budget -1 → DynamicAllowed=true → -1 (budget)
+		{
+			name:            "37",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model(-1)",
+			inputJSON:       `{"model":"gemini-mixed-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// claude-budget-model (Min=1024, Max=128000, ZeroAllowed=true, DynamicAllowed=false)
+
+		// Case 38: OpenAI source no suffix → passthrough
+		{
+			name:        "38",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 39: Effort medium → 8192
+		{
+			name:        "39",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model(medium)",
+			inputJSON:   `{"model":"claude-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 40: Effort xhigh → clamped to 32768 (matrix value)
+		{
+			name:        "40",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model(xhigh)",
+			inputJSON:   `{"model":"claude-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "32768",
+			expectErr:   false,
+		},
+		// Case 41: Effort none → ZeroAllowed=true → disabled
+		{
+			name:        "41",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model(none)",
+			inputJSON:   `{"model":"claude-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.type",
+			expectValue: "disabled",
+			expectErr:   false,
+		},
+		// Case 42: Effort auto → DynamicAllowed=false → 64512 (mid-range)
+		{
+			name:        "42",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model(auto)",
+			inputJSON:   `{"model":"claude-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "64512",
+			expectErr:   false,
+		},
+		// Case 43: Gemini source no suffix → passthrough
+		{
+			name:        "43",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 44: Budget 8192 → 8192
+		{
+			name:        "44",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model(8192)",
+			inputJSON:   `{"model":"claude-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 45: Budget 200000 → clamped to 128000 (max)
+		{
+			name:        "45",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model(200000)",
+			inputJSON:   `{"model":"claude-budget-model(200000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "128000",
+			expectErr:   false,
+		},
+		// Case 46: Budget 0 → ZeroAllowed=true → disabled
+		{
+			name:        "46",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model(0)",
+			inputJSON:   `{"model":"claude-budget-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.type",
+			expectValue: "disabled",
+			expectErr:   false,
+		},
+		// Case 47: Budget -1 → auto → DynamicAllowed=false → 64512 (mid-range)
+		{
+			name:        "47",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model(-1)",
+			inputJSON:   `{"model":"claude-budget-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "64512",
+			expectErr:   false,
+		},
+
+		// antigravity-budget-model (Min=128, Max=20000, ZeroAllowed=true, DynamicAllowed=true)
+
+		// Case 48: Gemini to Antigravity no suffix → passthrough
+		{
+			name:        "48",
+			from:        "gemini",
+			to:          "antigravity",
+			model:       "antigravity-budget-model",
+			inputJSON:   `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 49: Effort medium → 8192
+		{
+			name:            "49",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(medium)",
+			inputJSON:       `{"model":"antigravity-budget-model(medium)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 50: Effort xhigh → clamped to 20000 (max)
+		{
+			name:            "50",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(xhigh)",
+			inputJSON:       `{"model":"antigravity-budget-model(xhigh)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 51: Effort none → ZeroAllowed=true → 0 → includeThoughts=false
+		{
+			name:            "51",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(none)",
+			inputJSON:       `{"model":"antigravity-budget-model(none)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "0",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 52: Effort auto → DynamicAllowed=true → -1
+		{
+			name:            "52",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(auto)",
+			inputJSON:       `{"model":"antigravity-budget-model(auto)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 53: Claude to Antigravity no suffix → passthrough
+		{
+			name:        "53",
+			from:        "claude",
+			to:          "antigravity",
+			model:       "antigravity-budget-model",
+			inputJSON:   `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 54: Budget 8192 → 8192
+		{
+			name:            "54",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(8192)",
+			inputJSON:       `{"model":"antigravity-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 55: Budget 64000 → clamped to 20000 (max)
+		{
+			name:            "55",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(64000)",
+			inputJSON:       `{"model":"antigravity-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 56: Budget 0 → ZeroAllowed=true → 0 → includeThoughts=false
+		{
+			name:            "56",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(0)",
+			inputJSON:       `{"model":"antigravity-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "0",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 57: Budget -1 → DynamicAllowed=true → -1
+		{
+			name:            "57",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(-1)",
+			inputJSON:       `{"model":"antigravity-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// no-thinking-model (Thinking=nil)
+
+		// Case 58: No thinking support → no configuration
+		{
+			name:        "58",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 59: Budget 8192 → no thinking support → suffix stripped → no configuration
+		{
+			name:        "59",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model(8192)",
+			inputJSON:   `{"model":"no-thinking-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 60: Budget 0 → suffix stripped → no configuration
+		{
+			name:        "60",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model(0)",
+			inputJSON:   `{"model":"no-thinking-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 61: Budget -1 → suffix stripped → no configuration
+		{
+			name:        "61",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model(-1)",
+			inputJSON:   `{"model":"no-thinking-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 62: Claude source no suffix → no configuration
+		{
+			name:        "62",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 63: Budget 8192 → suffix stripped → no configuration
+		{
+			name:        "63",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model(8192)",
+			inputJSON:   `{"model":"no-thinking-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 64: Budget 0 → suffix stripped → no configuration
+		{
+			name:        "64",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model(0)",
+			inputJSON:   `{"model":"no-thinking-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 65: Budget -1 → suffix stripped → no configuration
+		{
+			name:        "65",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model(-1)",
+			inputJSON:   `{"model":"no-thinking-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+
+		// user-defined-model (UserDefined=true, Thinking=nil)
+
+		// Case 66: User defined model no suffix → passthrough
+		{
+			name:        "66",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 67: Budget 8192 → passthrough logic → medium
+		{
+			name:        "67",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 68: Budget 64000 → passthrough logic → xhigh
+		{
+			name:        "68",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model(64000)",
+			inputJSON:   `{"model":"user-defined-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "xhigh",
+			expectErr:   false,
+		},
+		// Case 69: Budget 0 → passthrough logic → none
+		{
+			name:        "69",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model(0)",
+			inputJSON:   `{"model":"user-defined-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "none",
+			expectErr:   false,
+		},
+		// Case 70: Budget -1 → passthrough logic → auto
+		{
+			name:        "70",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model(-1)",
+			inputJSON:   `{"model":"user-defined-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "auto",
+			expectErr:   false,
+		},
+		// Case 71: Claude to Codex no suffix → injected default → medium
+		{
+			name:        "71",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 72: Budget 8192 → passthrough logic → medium
+		{
+			name:        "72",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 73: Budget 64000 → passthrough logic → xhigh
+		{
+			name:        "73",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model(64000)",
+			inputJSON:   `{"model":"user-defined-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "xhigh",
+			expectErr:   false,
+		},
+		// Case 74: Budget 0 → passthrough logic → none
+		{
+			name:        "74",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model(0)",
+			inputJSON:   `{"model":"user-defined-model(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "none",
+			expectErr:   false,
+		},
+		// Case 75: Budget -1 → passthrough logic → auto
+		{
+			name:        "75",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model(-1)",
+			inputJSON:   `{"model":"user-defined-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "auto",
+			expectErr:   false,
+		},
+		// Case 76: OpenAI to Gemini budget 8192 → passthrough → 8192
+		{
+			name:            "76",
+			from:            "openai",
+			to:              "gemini",
+			model:           "user-defined-model(8192)",
+			inputJSON:       `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 77: OpenAI to Claude budget 8192 → passthrough → 8192
+		{
+			name:        "77",
+			from:        "openai",
+			to:          "claude",
+			model:       "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 78: Codex to Gemini budget 8192 → passthrough → 8192
+		{
+			name:            "78",
+			from:            "codex",
+			to:              "gemini",
+			model:           "user-defined-model(8192)",
+			inputJSON:       `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 79: Codex to Claude budget 8192 → passthrough → 8192
+		{
+			name:        "79",
+			from:        "codex",
+			to:          "claude",
+			model:       "user-defined-model(8192)",
+			inputJSON:   `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+
+		// Same-protocol passthrough tests (80-89)
+
+		// Case 80: OpenAI to OpenAI, level high → passthrough reasoning_effort
+		{
+			name:        "80",
+			from:        "openai",
+			to:          "openai",
+			model:       "level-model(high)",
+			inputJSON:   `{"model":"level-model(high)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 81: OpenAI to OpenAI, level xhigh → out of range error
+		{
+			name:        "81",
+			from:        "openai",
+			to:          "openai",
+			model:       "level-model(xhigh)",
+			inputJSON:   `{"model":"level-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 82: Codex to Codex, level high → passthrough reasoning.effort
+		{
+			name:        "82",
+			from:        "codex",
+			to:          "codex",
+			model:       "level-model(high)",
+			inputJSON:   `{"model":"level-model(high)","input":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 83: Codex to Codex, level xhigh → out of range error
+		{
+			name:        "83",
+			from:        "codex",
+			to:          "codex",
+			model:       "level-model(xhigh)",
+			inputJSON:   `{"model":"level-model(xhigh)","input":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 84: Gemini to Gemini, budget 8192 → passthrough thinkingBudget
+		{
+			name:            "84",
+			from:            "gemini",
+			to:              "gemini",
+			model:           "gemini-budget-model(8192)",
+			inputJSON:       `{"model":"gemini-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 85: Gemini to Gemini, budget 64000 → exceeds Max error
+		{
+			name:        "85",
+			from:        "gemini",
+			to:          "gemini",
+			model:       "gemini-budget-model(64000)",
+			inputJSON:   `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 86: Claude to Claude, budget 8192 → passthrough thinking.budget_tokens
+		{
+			name:        "86",
+			from:        "claude",
+			to:          "claude",
+			model:       "claude-budget-model(8192)",
+			inputJSON:   `{"model":"claude-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 87: Claude to Claude, budget 200000 → exceeds Max error
+		{
+			name:        "87",
+			from:        "claude",
+			to:          "claude",
+			model:       "claude-budget-model(200000)",
+			inputJSON:   `{"model":"claude-budget-model(200000)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 88: Antigravity to Antigravity, budget 8192 → passthrough thinkingBudget
+		{
+			name:            "88",
+			from:            "antigravity",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(8192)",
+			inputJSON:       `{"model":"antigravity-budget-model(8192)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 89: Antigravity to Antigravity, budget 64000 → exceeds Max error
+		{
+			name:        "89",
+			from:        "antigravity",
+			to:          "antigravity",
+			model:       "antigravity-budget-model(64000)",
+			inputJSON:   `{"model":"antigravity-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+
+		// iflow tests: glm-test and minimax-test (Cases 90-105)
+
+		// glm-test (from: openai, claude)
+		// Case 90: OpenAI to iflow, no suffix → passthrough
+		{
+			name:        "90",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 91: OpenAI to iflow, (medium) → enable_thinking=true
+		{
+			name:        "91",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test(medium)",
+			inputJSON:   `{"model":"glm-test(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 92: OpenAI to iflow, (auto) → enable_thinking=true
+		{
+			name:        "92",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test(auto)",
+			inputJSON:   `{"model":"glm-test(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 93: OpenAI to iflow, (none) → enable_thinking=false
+		{
+			name:        "93",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test(none)",
+			inputJSON:   `{"model":"glm-test(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "false",
+			expectErr:   false,
+		},
+		// Case 94: Claude to iflow, no suffix → passthrough
+		{
+			name:        "94",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 95: Claude to iflow, (8192) → enable_thinking=true
+		{
+			name:        "95",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test(8192)",
+			inputJSON:   `{"model":"glm-test(8192)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 96: Claude to iflow, (-1) → enable_thinking=true
+		{
+			name:        "96",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test(-1)",
+			inputJSON:   `{"model":"glm-test(-1)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 97: Claude to iflow, (0) → enable_thinking=false
+		{
+			name:        "97",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test(0)",
+			inputJSON:   `{"model":"glm-test(0)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "false",
+			expectErr:   false,
+		},
+
+		// minimax-test (from: openai, gemini)
+		// Case 98: OpenAI to iflow, no suffix → passthrough
+		{
+			name:        "98",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 99: OpenAI to iflow, (medium) → reasoning_split=true
+		{
+			name:        "99",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test(medium)",
+			inputJSON:   `{"model":"minimax-test(medium)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 100: OpenAI to iflow, (auto) → reasoning_split=true
+		{
+			name:        "100",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test(auto)",
+			inputJSON:   `{"model":"minimax-test(auto)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 101: OpenAI to iflow, (none) → reasoning_split=false
+		{
+			name:        "101",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test(none)",
+			inputJSON:   `{"model":"minimax-test(none)","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning_split",
+			expectValue: "false",
+			expectErr:   false,
+		},
+		// Case 102: Gemini to iflow, no suffix → passthrough
+		{
+			name:        "102",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 103: Gemini to iflow, (8192) → reasoning_split=true
+		{
+			name:        "103",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test(8192)",
+			inputJSON:   `{"model":"minimax-test(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 104: Gemini to iflow, (-1) → reasoning_split=true
+		{
+			name:        "104",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test(-1)",
+			inputJSON:   `{"model":"minimax-test(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 105: Gemini to iflow, (0) → reasoning_split=false
+		{
+			name:        "105",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test(0)",
+			inputJSON:   `{"model":"minimax-test(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning_split",
+			expectValue: "false",
+			expectErr:   false,
+		},
+	}
+
+	runThinkingTests(t, cases)
+}
+
+// TestThinkingE2EMatrix_Body tests the thinking configuration transformation using request body parameters.
+// Data flow: Input JSON with thinking params → TranslateRequest → ApplyThinking → Validate Output
+func TestThinkingE2EMatrix_Body(t *testing.T) {
+	reg := registry.GetGlobalRegistry()
+	uid := fmt.Sprintf("thinking-e2e-body-%d", time.Now().UnixNano())
+
+	reg.RegisterClient(uid, "test", getTestModels())
+	defer reg.UnregisterClient(uid)
+
+	cases := []thinkingTestCase{
+		// level-model (Levels=minimal/low/medium/high, ZeroAllowed=false, DynamicAllowed=false)
+
+		// Case 1: No param → injected default → medium
+		{
+			name:        "1",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 2: reasoning_effort=medium → medium
+		{
+			name:        "2",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 3: reasoning_effort=xhigh → out of range error
+		{
+			name:        "3",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 4: reasoning_effort=none → clamped to minimal
+		{
+			name:        "4",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
+			expectField: "reasoning.effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 5: reasoning_effort=auto → medium (DynamicAllowed=false)
+		{
+			name:        "5",
+			from:        "openai",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 6: No param from gemini → injected default → medium
+		{
+			name:        "6",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 7: thinkingBudget=8192 → medium
+		{
+			name:        "7",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 8: thinkingBudget=64000 → clamped to high
+		{
+			name:        "8",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`,
+			expectField: "reasoning.effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 9: thinkingBudget=0 → clamped to minimal
+		{
+			name:        "9",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`,
+			expectField: "reasoning.effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 10: thinkingBudget=-1 → medium (DynamicAllowed=false)
+		{
+			name:        "10",
+			from:        "gemini",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 11: Claude no param → passthrough (no thinking)
+		{
+			name:        "11",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 12: thinking.budget_tokens=8192 → medium
+		{
+			name:        "12",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 13: thinking.budget_tokens=64000 → clamped to high
+		{
+			name:        "13",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`,
+			expectField: "reasoning_effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 14: thinking.budget_tokens=0 → clamped to minimal
+		{
+			name:        "14",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
+			expectField: "reasoning_effort",
+			expectValue: "minimal",
+			expectErr:   false,
+		},
+		// Case 15: thinking.budget_tokens=-1 → medium (DynamicAllowed=false)
+		{
+			name:        "15",
+			from:        "claude",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+
+		// level-subset-model (Levels=low/high, ZeroAllowed=false, DynamicAllowed=false)
+
+		// Case 16: thinkingBudget=8192 → medium → rounded down to low
+		{
+			name:        "16",
+			from:        "gemini",
+			to:          "openai",
+			model:       "level-subset-model",
+			inputJSON:   `{"model":"level-subset-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField: "reasoning_effort",
+			expectValue: "low",
+			expectErr:   false,
+		},
+		// Case 17: thinking.budget_tokens=1 → minimal → clamped to low
+		{
+			name:            "17",
+			from:            "claude",
+			to:              "gemini",
+			model:           "level-subset-model",
+			inputJSON:       `{"model":"level-subset-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":1}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// gemini-budget-model (Min=128, Max=20000, ZeroAllowed=false, DynamicAllowed=true)
+
+		// Case 18: No param → passthrough
+		{
+			name:        "18",
+			from:        "openai",
+			to:          "gemini",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 19: reasoning_effort=medium → 8192
+		{
+			name:            "19",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 20: reasoning_effort=xhigh → clamped to 20000
+		{
+			name:            "20",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 21: reasoning_effort=none → clamped to 128 → includeThoughts=false
+		{
+			name:            "21",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "128",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 22: reasoning_effort=auto → -1 (DynamicAllowed=true)
+		{
+			name:            "22",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 23: Claude no param → passthrough
+		{
+			name:        "23",
+			from:        "claude",
+			to:          "gemini",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 24: thinking.budget_tokens=8192 → 8192
+		{
+			name:            "24",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 25: thinking.budget_tokens=64000 → clamped to 20000
+		{
+			name:            "25",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 26: thinking.budget_tokens=0 → clamped to 128 → includeThoughts=false
+		{
+			name:            "26",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "128",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 27: thinking.budget_tokens=-1 → -1 (DynamicAllowed=true)
+		{
+			name:            "27",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// gemini-mixed-model (Min=128, Max=32768, Levels=low/high, ZeroAllowed=false, DynamicAllowed=true)
+
+		// Case 28: No param → passthrough
+		{
+			name:        "28",
+			from:        "openai",
+			to:          "gemini",
+			model:       "gemini-mixed-model",
+			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 29: reasoning_effort=high → high
+		{
+			name:            "29",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-mixed-model",
+			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"high"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "high",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 30: reasoning_effort=xhigh → error (not in low/high)
+		{
+			name:        "30",
+			from:        "openai",
+			to:          "gemini",
+			model:       "gemini-mixed-model",
+			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 31: reasoning_effort=none → clamped to low → includeThoughts=false
+		{
+			name:            "31",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-mixed-model",
+			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 32: reasoning_effort=auto → -1 (DynamicAllowed=true)
+		{
+			name:            "32",
+			from:            "openai",
+			to:              "gemini",
+			model:           "gemini-mixed-model",
+			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 33: Claude no param → passthrough
+		{
+			name:        "33",
+			from:        "claude",
+			to:          "gemini",
+			model:       "gemini-mixed-model",
+			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 34: thinking.budget_tokens=8192 → 8192 (keeps budget)
+		{
+			name:            "34",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model",
+			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 35: thinking.budget_tokens=64000 → clamped to 32768 (keeps budget)
+		{
+			name:            "35",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model",
+			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "32768",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 36: thinking.budget_tokens=0 → clamped to low → includeThoughts=false
+		{
+			name:            "36",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model",
+			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
+			expectValue:     "low",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 37: thinking.budget_tokens=-1 → -1 (DynamicAllowed=true)
+		{
+			name:            "37",
+			from:            "claude",
+			to:              "gemini",
+			model:           "gemini-mixed-model",
+			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// claude-budget-model (Min=1024, Max=128000, ZeroAllowed=true, DynamicAllowed=false)
+
+		// Case 38: No param → passthrough
+		{
+			name:        "38",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 39: reasoning_effort=medium → 8192
+		{
+			name:        "39",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 40: reasoning_effort=xhigh → clamped to 32768
+		{
+			name:        "40",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "32768",
+			expectErr:   false,
+		},
+		// Case 41: reasoning_effort=none → disabled
+		{
+			name:        "41",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
+			expectField: "thinking.type",
+			expectValue: "disabled",
+			expectErr:   false,
+		},
+		// Case 42: reasoning_effort=auto → 64512 (mid-range)
+		{
+			name:        "42",
+			from:        "openai",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "64512",
+			expectErr:   false,
+		},
+		// Case 43: Gemini no param → passthrough
+		{
+			name:        "43",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 44: thinkingBudget=8192 → 8192
+		{
+			name:        "44",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 45: thinkingBudget=200000 → clamped to 128000
+		{
+			name:        "45",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":200000}}}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "128000",
+			expectErr:   false,
+		},
+		// Case 46: thinkingBudget=0 → disabled
+		{
+			name:        "46",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`,
+			expectField: "thinking.type",
+			expectValue: "disabled",
+			expectErr:   false,
+		},
+		// Case 47: thinkingBudget=-1 → 64512 (mid-range)
+		{
+			name:        "47",
+			from:        "gemini",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "64512",
+			expectErr:   false,
+		},
+
+		// antigravity-budget-model (Min=128, Max=20000, ZeroAllowed=true, DynamicAllowed=true)
+
+		// Case 48: Gemini no param → passthrough
+		{
+			name:        "48",
+			from:        "gemini",
+			to:          "antigravity",
+			model:       "antigravity-budget-model",
+			inputJSON:   `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 49: thinkingLevel=medium → 8192
+		{
+			name:            "49",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingLevel":"medium"}}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 50: thinkingLevel=xhigh → clamped to 20000
+		{
+			name:            "50",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingLevel":"xhigh"}}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 51: thinkingLevel=none → 0 (ZeroAllowed=true)
+		{
+			name:            "51",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingLevel":"none"}}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "0",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 52: thinkingBudget=-1 → -1 (DynamicAllowed=true)
+		{
+			name:            "52",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 53: Claude no param → passthrough
+		{
+			name:        "53",
+			from:        "claude",
+			to:          "antigravity",
+			model:       "antigravity-budget-model",
+			inputJSON:   `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 54: thinking.budget_tokens=8192 → 8192
+		{
+			name:            "54",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 55: thinking.budget_tokens=64000 → clamped to 20000
+		{
+			name:            "55",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 56: thinking.budget_tokens=0 → 0 (ZeroAllowed=true)
+		{
+			name:            "56",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "0",
+			includeThoughts: "false",
+			expectErr:       false,
+		},
+		// Case 57: thinking.budget_tokens=-1 → -1 (DynamicAllowed=true)
+		{
+			name:            "57",
+			from:            "claude",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "-1",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+
+		// no-thinking-model (Thinking=nil)
+
+		// Case 58: Gemini no param → passthrough
+		{
+			name:        "58",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 59: thinkingBudget=8192 → stripped
+		{
+			name:        "59",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 60: thinkingBudget=0 → stripped
+		{
+			name:        "60",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 61: thinkingBudget=-1 → stripped
+		{
+			name:        "61",
+			from:        "gemini",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 62: Claude no param → passthrough
+		{
+			name:        "62",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 63: thinking.budget_tokens=8192 → stripped
+		{
+			name:        "63",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 64: thinking.budget_tokens=0 → stripped
+		{
+			name:        "64",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 65: thinking.budget_tokens=-1 → stripped
+		{
+			name:        "65",
+			from:        "claude",
+			to:          "openai",
+			model:       "no-thinking-model",
+			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`,
+			expectField: "",
+			expectErr:   false,
+		},
+
+		// user-defined-model (UserDefined=true, Thinking=nil)
+
+		// Case 66: Gemini no param → passthrough
+		{
+			name:        "66",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 67: thinkingBudget=8192 → medium
+		{
+			name:        "67",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField: "reasoning_effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 68: thinkingBudget=64000 → xhigh (passthrough)
+		{
+			name:        "68",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`,
+			expectField: "reasoning_effort",
+			expectValue: "xhigh",
+			expectErr:   false,
+		},
+		// Case 69: thinkingBudget=0 → none
+		{
+			name:        "69",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`,
+			expectField: "reasoning_effort",
+			expectValue: "none",
+			expectErr:   false,
+		},
+		// Case 70: thinkingBudget=-1 → auto
+		{
+			name:        "70",
+			from:        "gemini",
+			to:          "openai",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
+			expectField: "reasoning_effort",
+			expectValue: "auto",
+			expectErr:   false,
+		},
+		// Case 71: Claude no param → injected default → medium
+		{
+			name:        "71",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 72: thinking.budget_tokens=8192 → medium
+		{
+			name:        "72",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField: "reasoning.effort",
+			expectValue: "medium",
+			expectErr:   false,
+		},
+		// Case 73: thinking.budget_tokens=64000 → xhigh (passthrough)
+		{
+			name:        "73",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":64000}}`,
+			expectField: "reasoning.effort",
+			expectValue: "xhigh",
+			expectErr:   false,
+		},
+		// Case 74: thinking.budget_tokens=0 → none
+		{
+			name:        "74",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
+			expectField: "reasoning.effort",
+			expectValue: "none",
+			expectErr:   false,
+		},
+		// Case 75: thinking.budget_tokens=-1 → auto
+		{
+			name:        "75",
+			from:        "claude",
+			to:          "codex",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`,
+			expectField: "reasoning.effort",
+			expectValue: "auto",
+			expectErr:   false,
+		},
+		// Case 76: OpenAI reasoning_effort=medium to Gemini → 8192
+		{
+			name:            "76",
+			from:            "openai",
+			to:              "gemini",
+			model:           "user-defined-model",
+			inputJSON:       `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 77: OpenAI reasoning_effort=medium to Claude → 8192
+		{
+			name:        "77",
+			from:        "openai",
+			to:          "claude",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 78: Codex reasoning.effort=medium to Gemini → 8192
+		{
+			name:            "78",
+			from:            "codex",
+			to:              "gemini",
+			model:           "user-defined-model",
+			inputJSON:       `{"model":"user-defined-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"medium"}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 79: Codex reasoning.effort=medium to Claude → 8192
+		{
+			name:        "79",
+			from:        "codex",
+			to:          "claude",
+			model:       "user-defined-model",
+			inputJSON:   `{"model":"user-defined-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"medium"}}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+
+		// Same-protocol passthrough tests (80-89)
+
+		// Case 80: OpenAI to OpenAI, reasoning_effort=high → passthrough
+		{
+			name:        "80",
+			from:        "openai",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"high"}`,
+			expectField: "reasoning_effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 81: OpenAI to OpenAI, reasoning_effort=xhigh → out of range error
+		{
+			name:        "81",
+			from:        "openai",
+			to:          "openai",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 82: Codex to Codex, reasoning.effort=high → passthrough
+		{
+			name:        "82",
+			from:        "codex",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"high"}}`,
+			expectField: "reasoning.effort",
+			expectValue: "high",
+			expectErr:   false,
+		},
+		// Case 83: Codex to Codex, reasoning.effort=xhigh → out of range error
+		{
+			name:        "83",
+			from:        "codex",
+			to:          "codex",
+			model:       "level-model",
+			inputJSON:   `{"model":"level-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"xhigh"}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 84: Gemini to Gemini, thinkingBudget=8192 → passthrough
+		{
+			name:            "84",
+			from:            "gemini",
+			to:              "gemini",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 85: Gemini to Gemini, thinkingBudget=64000 → exceeds Max error
+		{
+			name:        "85",
+			from:        "gemini",
+			to:          "gemini",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 86: Claude to Claude, thinking.budget_tokens=8192 → passthrough
+		{
+			name:        "86",
+			from:        "claude",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField: "thinking.budget_tokens",
+			expectValue: "8192",
+			expectErr:   false,
+		},
+		// Case 87: Claude to Claude, thinking.budget_tokens=200000 → exceeds Max error
+		{
+			name:        "87",
+			from:        "claude",
+			to:          "claude",
+			model:       "claude-budget-model",
+			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":200000}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 88: Antigravity to Antigravity, thinkingBudget=8192 → passthrough
+		{
+			name:            "88",
+			from:            "antigravity",
+			to:              "antigravity",
+			model:           "antigravity-budget-model",
+			inputJSON:       `{"model":"antigravity-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 89: Antigravity to Antigravity, thinkingBudget=64000 → exceeds Max error
+		{
+			name:        "89",
+			from:        "antigravity",
+			to:          "antigravity",
+			model:       "antigravity-budget-model",
+			inputJSON:   `{"model":"antigravity-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+
+		// iflow tests: glm-test and minimax-test (Cases 90-105)
+
+		// glm-test (from: openai, claude)
+		// Case 90: OpenAI to iflow, no param → passthrough
+		{
+			name:        "90",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 91: OpenAI to iflow, reasoning_effort=medium → enable_thinking=true
+		{
+			name:        "91",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 92: OpenAI to iflow, reasoning_effort=auto → enable_thinking=true
+		{
+			name:        "92",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 93: OpenAI to iflow, reasoning_effort=none → enable_thinking=false
+		{
+			name:        "93",
+			from:        "openai",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "false",
+			expectErr:   false,
+		},
+		// Case 94: Claude to iflow, no param → passthrough
+		{
+			name:        "94",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 95: Claude to iflow, thinking.budget_tokens=8192 → enable_thinking=true
+		{
+			name:        "95",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 96: Claude to iflow, thinking.budget_tokens=-1 → enable_thinking=true
+		{
+			name:        "96",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":-1}}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 97: Claude to iflow, thinking.budget_tokens=0 → enable_thinking=false
+		{
+			name:        "97",
+			from:        "claude",
+			to:          "iflow",
+			model:       "glm-test",
+			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
+			expectField: "chat_template_kwargs.enable_thinking",
+			expectValue: "false",
+			expectErr:   false,
+		},
+
+		// minimax-test (from: openai, gemini)
+		// Case 98: OpenAI to iflow, no param → passthrough
+		{
+			name:        "98",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 99: OpenAI to iflow, reasoning_effort=medium → reasoning_split=true
+		{
+			name:        "99",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"medium"}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 100: OpenAI to iflow, reasoning_effort=auto → reasoning_split=true
+		{
+			name:        "100",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"auto"}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 101: OpenAI to iflow, reasoning_effort=none → reasoning_split=false
+		{
+			name:        "101",
+			from:        "openai",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
+			expectField: "reasoning_split",
+			expectValue: "false",
+			expectErr:   false,
+		},
+		// Case 102: Gemini to iflow, no param → passthrough
+		{
+			name:        "102",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   false,
+		},
+		// Case 103: Gemini to iflow, thinkingBudget=8192 → reasoning_split=true
+		{
+			name:        "103",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 104: Gemini to iflow, thinkingBudget=-1 → reasoning_split=true
+		{
+			name:        "104",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
+			expectField: "reasoning_split",
+			expectValue: "true",
+			expectErr:   false,
+		},
+		// Case 105: Gemini to iflow, thinkingBudget=0 → reasoning_split=false
+		{
+			name:        "105",
+			from:        "gemini",
+			to:          "iflow",
+			model:       "minimax-test",
+			inputJSON:   `{"model":"minimax-test","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`,
+			expectField: "reasoning_split",
+			expectValue: "false",
+			expectErr:   false,
+		},
+	}
+
+	runThinkingTests(t, cases)
+}
+
+// getTestModels returns the shared model definitions for E2E tests.
+func getTestModels() []*registry.ModelInfo {
+	return []*registry.ModelInfo{
 		{
 			ID:          "level-model",
 			Object:      "model",
@@ -38,11 +2448,16 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			OwnedBy:     "test",
 			Type:        "openai",
 			DisplayName: "Level Model",
-			Thinking: &registry.ThinkingSupport{
-				Levels:         []string{"minimal", "low", "medium", "high"},
-				ZeroAllowed:    false,
-				DynamicAllowed: false,
-			},
+			Thinking:    &registry.ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:          "level-subset-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "gemini",
+			DisplayName: "Level Subset Model",
+			Thinking:    &registry.ThinkingSupport{Levels: []string{"low", "high"}, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:          "gemini-budget-model",
@@ -51,12 +2466,7 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			OwnedBy:     "test",
 			Type:        "gemini",
 			DisplayName: "Gemini Budget Model",
-			Thinking: &registry.ThinkingSupport{
-				Min:            128,
-				Max:            20000,
-				ZeroAllowed:    false,
-				DynamicAllowed: true,
-			},
+			Thinking:    &registry.ThinkingSupport{Min: 128, Max: 20000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:          "gemini-mixed-model",
@@ -65,13 +2475,7 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			OwnedBy:     "test",
 			Type:        "gemini",
 			DisplayName: "Gemini Mixed Model",
-			Thinking: &registry.ThinkingSupport{
-				Min:            128,
-				Max:            32768,
-				Levels:         []string{"low", "high"},
-				ZeroAllowed:    false,
-				DynamicAllowed: true,
-			},
+			Thinking:    &registry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:          "claude-budget-model",
@@ -80,12 +2484,7 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			OwnedBy:     "test",
 			Type:        "claude",
 			DisplayName: "Claude Budget Model",
-			Thinking: &registry.ThinkingSupport{
-				Min:            1024,
-				Max:            128000,
-				ZeroAllowed:    true,
-				DynamicAllowed: false,
-			},
+			Thinking:    &registry.ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:          "antigravity-budget-model",
@@ -94,12 +2493,7 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			OwnedBy:     "test",
 			Type:        "gemini-cli",
 			DisplayName: "Antigravity Budget Model",
-			Thinking: &registry.ThinkingSupport{
-				Min:            128,
-				Max:            20000,
-				ZeroAllowed:    true,
-				DynamicAllowed: true,
-			},
+			Thinking:    &registry.ThinkingSupport{Min: 128, Max: 20000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:          "no-thinking-model",
@@ -120,877 +2514,53 @@ func TestThinkingE2EMatrix(t *testing.T) {
 			UserDefined: true,
 			Thinking:    nil,
 		},
-	}
-
-	reg.RegisterClient(uid, "test", testModels)
-	defer reg.UnregisterClient(uid)
-
-	type testCase struct {
-		name            string
-		from            string
-		to              string
-		modelSuffix     string
-		inputJSON       string
-		expectField     string
-		expectValue     string
-		includeThoughts string
-		expectErr       bool
-	}
-
-	cases := []testCase{
-		// level-model (Levels=minimal/low/medium/high, ZeroAllowed=false, DynamicAllowed=false)
-		// Case 1: No suffix, translator adds default medium for codex
 		{
-			name:        "1",
-			from:        "openai",
-			to:          "codex",
-			modelSuffix: "level-model",
-			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// Case 2: Explicit medium level
-		{
-			name:        "2",
-			from:        "openai",
-			to:          "codex",
-			modelSuffix: "level-model(medium)",
-			inputJSON:   `{"model":"level-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// Case 3: xhigh not in Levels=[minimal,low,medium,high] → ValidateConfig returns error
-		{
-			name:        "3",
-			from:        "openai",
-			to:          "codex",
-			modelSuffix: "level-model(xhigh)",
-			inputJSON:   `{"model":"level-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   true,
-		},
-		// Case 4: none → ModeNone, ZeroAllowed=false → clamp to min level (minimal)
-		{
-			name:        "4",
-			from:        "openai",
-			to:          "codex",
-			modelSuffix: "level-model(none)",
-			inputJSON:   `{"model":"level-model(none)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "minimal",
-			expectErr:   false,
-		},
-		// Case 5: auto → ModeAuto, DynamicAllowed=false → convert to mid-range (medium)
-		{
-			name:        "5",
-			from:        "openai",
-			to:          "codex",
-			modelSuffix: "level-model(auto)",
-			inputJSON:   `{"model":"level-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// Case 6: No suffix from gemini → translator injects default reasoning.effort: medium
-		{
-			name:        "6",
-			from:        "gemini",
-			to:          "codex",
-			modelSuffix: "level-model",
-			inputJSON:   `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// Case 7: 8192 → medium (1025-8192)
-		{
-			name:        "7",
-			from:        "gemini",
-			to:          "codex",
-			modelSuffix: "level-model(8192)",
-			inputJSON:   `{"model":"level-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// Case 8: 64000 → xhigh → not supported → error
-		{
-			name:        "8",
-			from:        "gemini",
-			to:          "codex",
-			modelSuffix: "level-model(64000)",
-			inputJSON:   `{"model":"level-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   true,
-		},
-		// Case 9: 0 → ModeNone, ZeroAllowed=false → clamp to min level (minimal)
-		{
-			name:        "9",
-			from:        "gemini",
-			to:          "codex",
-			modelSuffix: "level-model(0)",
-			inputJSON:   `{"model":"level-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "minimal",
-			expectErr:   false,
-		},
-		// Case 10: -1 → ModeAuto, DynamicAllowed=false → convert to mid-range (medium)
-		{
-			name:        "10",
-			from:        "gemini",
-			to:          "codex",
-			modelSuffix: "level-model(-1)",
-			inputJSON:   `{"model":"level-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// Case 11: No suffix from claude → no thinking config
-		{
-			name:        "11",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "level-model",
-			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		// Case 12: 8192 → medium
-		{
-			name:        "12",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "level-model(8192)",
-			inputJSON:   `{"model":"level-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// Case 13: 64000 → xhigh → not supported → error
-		{
-			name:        "13",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "level-model(64000)",
-			inputJSON:   `{"model":"level-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   true,
-		},
-		// Case 14: 0 → ModeNone, ZeroAllowed=false → clamp to min level (minimal)
-		{
-			name:        "14",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "level-model(0)",
-			inputJSON:   `{"model":"level-model(0)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "minimal",
-			expectErr:   false,
-		},
-		// Case 15: -1 → ModeAuto, DynamicAllowed=false → convert to mid-range (medium)
-		{
-			name:        "15",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "level-model(-1)",
-			inputJSON:   `{"model":"level-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-
-		// gemini-budget-model (Min=128, Max=20000, ZeroAllowed=false, DynamicAllowed=true)
-		{
-			name:        "16",
-			from:        "openai",
-			to:          "gemini",
-			modelSuffix: "gemini-budget-model",
-			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		// medium → 8192
-		{
-			name:            "17",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(medium)",
-			inputJSON:       `{"model":"gemini-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "8192",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// xhigh → 32768 → clamp to 20000
-		{
-			name:            "18",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(xhigh)",
-			inputJSON:       `{"model":"gemini-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "20000",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// none → 0 → ZeroAllowed=false → clamp to 128, includeThoughts=false
-		{
-			name:            "19",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(none)",
-			inputJSON:       `{"model":"gemini-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "128",
-			includeThoughts: "false",
-			expectErr:       false,
-		},
-		// auto → -1 dynamic allowed
-		{
-			name:            "20",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(auto)",
-			inputJSON:       `{"model":"gemini-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "-1",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:        "21",
-			from:        "claude",
-			to:          "gemini",
-			modelSuffix: "gemini-budget-model",
-			inputJSON:   `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:            "22",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(8192)",
-			inputJSON:       `{"model":"gemini-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "8192",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:            "23",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(64000)",
-			inputJSON:       `{"model":"gemini-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "20000",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:            "24",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(0)",
-			inputJSON:       `{"model":"gemini-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "128",
-			includeThoughts: "false",
-			expectErr:       false,
-		},
-		{
-			name:            "25",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-budget-model(-1)",
-			inputJSON:       `{"model":"gemini-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "-1",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-
-		// gemini-mixed-model (Min=128, Max=32768, Levels=low/high, ZeroAllowed=false, DynamicAllowed=true)
-		{
-			name:        "26",
-			from:        "openai",
-			to:          "gemini",
-			modelSuffix: "gemini-mixed-model",
-			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		// high → use thinkingLevel
-		{
-			name:            "27",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "gemini-mixed-model(high)",
-			inputJSON:       `{"model":"gemini-mixed-model(high)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
-			expectValue:     "high",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// xhigh → not in Levels=[low,high] → error
-		{
-			name:        "28",
-			from:        "openai",
-			to:          "gemini",
-			modelSuffix: "gemini-mixed-model(xhigh)",
-			inputJSON:   `{"model":"gemini-mixed-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   true,
-		},
-		// none → ModeNone, ZeroAllowed=false → set Level to lowest (low), includeThoughts=false
-		{
-			name:            "29",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "gemini-mixed-model(none)",
-			inputJSON:       `{"model":"gemini-mixed-model(none)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
-			expectValue:     "low",
-			includeThoughts: "false",
-			expectErr:       false,
-		},
-		// auto → dynamic allowed, use thinkingBudget=-1
-		{
-			name:            "30",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "gemini-mixed-model(auto)",
-			inputJSON:       `{"model":"gemini-mixed-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "-1",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:        "31",
-			from:        "claude",
-			to:          "gemini",
-			modelSuffix: "gemini-mixed-model",
-			inputJSON:   `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		// 8192 → ModeBudget → clamp (in range) → thinkingBudget: 8192
-		{
-			name:            "32",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-mixed-model(8192)",
-			inputJSON:       `{"model":"gemini-mixed-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "8192",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// 64000 → ModeBudget → clamp to 32768 → thinkingBudget: 32768
-		{
-			name:            "33",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-mixed-model(64000)",
-			inputJSON:       `{"model":"gemini-mixed-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "32768",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// 0 → ModeNone, ZeroAllowed=false → set Level to lowest (low), includeThoughts=false
-		{
-			name:            "34",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-mixed-model(0)",
-			inputJSON:       `{"model":"gemini-mixed-model(0)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
-			expectValue:     "low",
-			includeThoughts: "false",
-			expectErr:       false,
-		},
-		// -1 → auto, dynamic allowed
-		{
-			name:            "35",
-			from:            "claude",
-			to:              "gemini",
-			modelSuffix:     "gemini-mixed-model(-1)",
-			inputJSON:       `{"model":"gemini-mixed-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "-1",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-
-		// claude-budget-model (Min=1024, Max=128000, ZeroAllowed=true, DynamicAllowed=false)
-		{
-			name:        "36",
-			from:        "openai",
-			to:          "claude",
-			modelSuffix: "claude-budget-model",
-			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		// medium → 8192
-		{
-			name:        "37",
-			from:        "openai",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(medium)",
-			inputJSON:   `{"model":"claude-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "8192",
-			expectErr:   false,
-		},
-		// xhigh → 32768
-		{
-			name:        "38",
-			from:        "openai",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(xhigh)",
-			inputJSON:   `{"model":"claude-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "32768",
-			expectErr:   false,
-		},
-		// none → ZeroAllowed=true → disabled
-		{
-			name:        "39",
-			from:        "openai",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(none)",
-			inputJSON:   `{"model":"claude-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "thinking.type",
-			expectValue: "disabled",
-			expectErr:   false,
-		},
-		// auto → ModeAuto, DynamicAllowed=false → convert to mid-range
-		{
-			name:        "40",
-			from:        "openai",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(auto)",
-			inputJSON:   `{"model":"claude-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "64512",
-			expectErr:   false,
-		},
-		{
-			name:        "41",
-			from:        "gemini",
-			to:          "claude",
-			modelSuffix: "claude-budget-model",
-			inputJSON:   `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "42",
-			from:        "gemini",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(8192)",
-			inputJSON:   `{"model":"claude-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "8192",
-			expectErr:   false,
-		},
-		{
-			name:        "43",
-			from:        "gemini",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(200000)",
-			inputJSON:   `{"model":"claude-budget-model(200000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "128000",
-			expectErr:   false,
-		},
-		// 0 → ZeroAllowed=true → disabled
-		{
-			name:        "44",
-			from:        "gemini",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(0)",
-			inputJSON:   `{"model":"claude-budget-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "thinking.type",
-			expectValue: "disabled",
-			expectErr:   false,
-		},
-		// -1 → auto → DynamicAllowed=false → mid-range
-		{
-			name:        "45",
-			from:        "gemini",
-			to:          "claude",
-			modelSuffix: "claude-budget-model(-1)",
-			inputJSON:   `{"model":"claude-budget-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "64512",
-			expectErr:   false,
-		},
-
-		// antigravity-budget-model (Min=128, Max=20000, ZeroAllowed=true, DynamicAllowed=true)
-		{
-			name:        "46",
-			from:        "gemini",
-			to:          "antigravity",
-			modelSuffix: "antigravity-budget-model",
-			inputJSON:   `{"model":"antigravity-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:            "47",
-			from:            "gemini",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(medium)",
-			inputJSON:       `{"model":"antigravity-budget-model(medium)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "8192",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:            "48",
-			from:            "gemini",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(xhigh)",
-			inputJSON:       `{"model":"antigravity-budget-model(xhigh)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "20000",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:            "49",
-			from:            "gemini",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(none)",
-			inputJSON:       `{"model":"antigravity-budget-model(none)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "0",
-			includeThoughts: "false",
-			expectErr:       false,
-		},
-		{
-			name:            "50",
-			from:            "gemini",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(auto)",
-			inputJSON:       `{"model":"antigravity-budget-model(auto)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "-1",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:        "51",
-			from:        "claude",
-			to:          "antigravity",
-			modelSuffix: "antigravity-budget-model",
-			inputJSON:   `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:            "52",
-			from:            "claude",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(8192)",
-			inputJSON:       `{"model":"antigravity-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "8192",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:            "53",
-			from:            "claude",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(64000)",
-			inputJSON:       `{"model":"antigravity-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "20000",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:            "54",
-			from:            "claude",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(0)",
-			inputJSON:       `{"model":"antigravity-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "0",
-			includeThoughts: "false",
-			expectErr:       false,
-		},
-		{
-			name:            "55",
-			from:            "claude",
-			to:              "antigravity",
-			modelSuffix:     "antigravity-budget-model(-1)",
-			inputJSON:       `{"model":"antigravity-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "-1",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-
-		// no-thinking-model (Thinking=nil)
-		{
-			name:        "46",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "no-thinking-model",
-			inputJSON:   `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "47",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "no-thinking-model(8192)",
-			inputJSON:   `{"model":"no-thinking-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "48",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "no-thinking-model(0)",
-			inputJSON:   `{"model":"no-thinking-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "49",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "no-thinking-model(-1)",
-			inputJSON:   `{"model":"no-thinking-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "50",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "no-thinking-model",
-			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "51",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "no-thinking-model(8192)",
-			inputJSON:   `{"model":"no-thinking-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "52",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "no-thinking-model(0)",
-			inputJSON:   `{"model":"no-thinking-model(0)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		{
-			name:        "53",
-			from:        "claude",
-			to:          "openai",
-			modelSuffix: "no-thinking-model(-1)",
-			inputJSON:   `{"model":"no-thinking-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-
-		// user-defined-model (UserDefined=true, Thinking=nil)
-		{
-			name:        "54",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "user-defined-model",
-			inputJSON:   `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   false,
-		},
-		// 8192 → medium (passthrough for UserDefined)
-		{
-			name:        "55",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "user-defined-model(8192)",
-			inputJSON:   `{"model":"user-defined-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// 64000 → xhigh
-		{
-			name:        "56",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "user-defined-model(64000)",
-			inputJSON:   `{"model":"user-defined-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "xhigh",
-			expectErr:   false,
-		},
-		// 0 → none
-		{
-			name:        "57",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "user-defined-model(0)",
-			inputJSON:   `{"model":"user-defined-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-		// -1 → auto
-		{
-			name:        "58",
-			from:        "gemini",
-			to:          "openai",
-			modelSuffix: "user-defined-model(-1)",
-			inputJSON:   `{"model":"user-defined-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "auto",
-			expectErr:   false,
-		},
-		// Case 59: No suffix from claude → translator injects default reasoning.effort: medium
-		{
-			name:        "59",
-			from:        "claude",
-			to:          "codex",
-			modelSuffix: "user-defined-model",
-			inputJSON:   `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// 8192 → medium
-		{
-			name:        "60",
-			from:        "claude",
-			to:          "codex",
-			modelSuffix: "user-defined-model(8192)",
-			inputJSON:   `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "medium",
-			expectErr:   false,
-		},
-		// 64000 → xhigh
-		{
-			name:        "61",
-			from:        "claude",
-			to:          "codex",
-			modelSuffix: "user-defined-model(64000)",
-			inputJSON:   `{"model":"user-defined-model(64000)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "xhigh",
-			expectErr:   false,
-		},
-		// 0 → none
-		{
-			name:        "62",
-			from:        "claude",
-			to:          "codex",
-			modelSuffix: "user-defined-model(0)",
-			inputJSON:   `{"model":"user-defined-model(0)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-		// -1 → auto
-		{
-			name:        "63",
-			from:        "claude",
-			to:          "codex",
-			modelSuffix: "user-defined-model(-1)",
-			inputJSON:   `{"model":"user-defined-model(-1)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "auto",
-			expectErr:   false,
-		},
-		// openai/codex → gemini/claude for user-defined-model
-		{
-			name:            "64",
-			from:            "openai",
-			to:              "gemini",
-			modelSuffix:     "user-defined-model(8192)",
-			inputJSON:       `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "8192",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:        "65",
-			from:        "openai",
-			to:          "claude",
-			modelSuffix: "user-defined-model(8192)",
-			inputJSON:   `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "8192",
-			expectErr:   false,
-		},
-		{
-			name:            "66",
-			from:            "codex",
-			to:              "gemini",
-			modelSuffix:     "user-defined-model(8192)",
-			inputJSON:       `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "8192",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		{
-			name:        "67",
-			from:        "codex",
-			to:          "claude",
-			modelSuffix: "user-defined-model(8192)",
-			inputJSON:   `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
-			expectField: "thinking.budget_tokens",
-			expectValue: "8192",
-			expectErr:   false,
+			ID:          "glm-test",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "iflow",
+			DisplayName: "GLM Test Model",
+			Thinking:    &registry.ThinkingSupport{Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:          "minimax-test",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "test",
+			Type:        "iflow",
+			DisplayName: "MiniMax Test Model",
+			Thinking:    &registry.ThinkingSupport{Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}},
 		},
 	}
+}
 
+// runThinkingTests runs thinking test cases using the real data flow path.
+func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
 	for _, tc := range cases {
 		tc := tc
-		testName := fmt.Sprintf("Case%s_%s->%s_%s", tc.name, tc.from, tc.to, tc.modelSuffix)
+		testName := fmt.Sprintf("Case%s_%s->%s_%s", tc.name, tc.from, tc.to, tc.model)
 		t.Run(testName, func(t *testing.T) {
-			// Real data flow path:
-			// 1. Parse suffix to get base model
-			suffixResult := thinking.ParseSuffix(tc.modelSuffix)
+			suffixResult := thinking.ParseSuffix(tc.model)
 			baseModel := suffixResult.ModelName
 
-			// 2. Translate request from source format to target format
+			translateTo := tc.to
+			applyTo := tc.to
+			if tc.to == "iflow" {
+				translateTo = "openai"
+				applyTo = "iflow"
+			}
+
 			body := sdktranslator.TranslateRequest(
 				sdktranslator.FromString(tc.from),
-				sdktranslator.FromString(tc.to),
+				sdktranslator.FromString(translateTo),
 				baseModel,
 				[]byte(tc.inputJSON),
 				true,
 			)
 
-			// 3. Apply thinking configuration (main entry point)
-			body, err := thinking.ApplyThinking(body, tc.modelSuffix, tc.to)
+			body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo)
 
-			// Validate results
 			if tc.expectErr {
 				if err == nil {
 					t.Fatalf("expected error but got none, body=%s", string(body))
@@ -1001,18 +2571,23 @@ func TestThinkingE2EMatrix(t *testing.T) {
 				t.Fatalf("unexpected error: %v, body=%s", err, string(body))
 			}
 
-			// Check for expected field absence
 			if tc.expectField == "" {
 				var hasThinking bool
 				switch tc.to {
 				case "gemini":
 					hasThinking = gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists()
+				case "gemini-cli":
+					hasThinking = gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists()
+				case "antigravity":
+					hasThinking = gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists()
 				case "claude":
 					hasThinking = gjson.GetBytes(body, "thinking").Exists()
 				case "openai":
 					hasThinking = gjson.GetBytes(body, "reasoning_effort").Exists()
 				case "codex":
 					hasThinking = gjson.GetBytes(body, "reasoning.effort").Exists() || gjson.GetBytes(body, "reasoning").Exists()
+				case "iflow":
+					hasThinking = gjson.GetBytes(body, "chat_template_kwargs.enable_thinking").Exists() || gjson.GetBytes(body, "reasoning_split").Exists()
 				}
 				if hasThinking {
 					t.Fatalf("expected no thinking field but found one, body=%s", string(body))
@@ -1020,7 +2595,6 @@ func TestThinkingE2EMatrix(t *testing.T) {
 				return
 			}
 
-			// Check expected field value
 			val := gjson.GetBytes(body, tc.expectField)
 			if !val.Exists() {
 				t.Fatalf("expected field %s not found, body=%s", tc.expectField, string(body))
@@ -1034,7 +2608,6 @@ func TestThinkingE2EMatrix(t *testing.T) {
 				t.Fatalf("field %s: expected %q, got %q, body=%s", tc.expectField, tc.expectValue, actualValue, string(body))
 			}
 
-			// Check includeThoughts for Gemini/Antigravity
 			if tc.includeThoughts != "" && (tc.to == "gemini" || tc.to == "antigravity") {
 				path := "generationConfig.thinkingConfig.includeThoughts"
 				if tc.to == "antigravity" {
@@ -1049,6 +2622,17 @@ func TestThinkingE2EMatrix(t *testing.T) {
 					t.Fatalf("includeThoughts: expected %s, got %s, body=%s", tc.includeThoughts, actual, string(body))
 				}
 			}
+
+			// Verify clear_thinking for iFlow GLM models when enable_thinking=true
+			if tc.to == "iflow" && tc.expectField == "chat_template_kwargs.enable_thinking" && tc.expectValue == "true" {
+				ctVal := gjson.GetBytes(body, "chat_template_kwargs.clear_thinking")
+				if !ctVal.Exists() {
+					t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
+				}
+				if ctVal.Bool() != false {
+					t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
+				}
+			}
 		})
 	}
 }

From d5ef4a6d1571d9ebb2208b8605f5e71704c70c0e Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sat, 17 Jan 2026 21:25:56 +0800
Subject: [PATCH 53/65] refactor(translator): remove registry model lookups
 from thinking config conversions

---
 .../claude/antigravity_claude_request.go      | 14 +++---
 .../claude/antigravity_claude_request_test.go |  4 +-
 .../claude/gemini/claude_gemini_request.go    | 43 ++++++++++++++-----
 .../chat-completions/claude_openai_request.go | 29 ++++++-------
 .../claude_openai-responses_request.go        | 29 ++++++-------
 .../codex/claude/codex_claude_request.go      | 16 +++----
 .../codex/gemini/codex_gemini_request.go      | 27 +++++++-----
 .../claude/gemini-cli_claude_request.go       | 14 +++---
 .../gemini/claude/gemini_claude_request.go    | 16 +++----
 .../openai/gemini/openai_gemini_request.go    | 13 +++---
 10 files changed, 107 insertions(+), 98 deletions(-)

diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index 2611b5c6..771a7b4f 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -12,7 +12,6 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -388,14 +387,11 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil {
-			if t.Get("type").String() == "enabled" {
-				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := int(b.Int())
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-				}
+		if t.Get("type").String() == "enabled" {
+			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+				budget := int(b.Int())
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request_test.go b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
index 1d727c94..1babf65c 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
@@ -343,8 +343,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingConfig(t *testing.T) {
 		if thinkingConfig.Get("thinkingBudget").Int() != 8000 {
 			t.Errorf("Expected thinkingBudget 8000, got %d", thinkingConfig.Get("thinkingBudget").Int())
 		}
-		if !thinkingConfig.Get("include_thoughts").Bool() {
-			t.Error("include_thoughts should be true")
+		if !thinkingConfig.Get("includeThoughts").Bool() {
+			t.Error("includeThoughts should be true")
 		}
 	} else {
 		t.Log("thinkingConfig not present - model may not be registered in test registry")
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index 89857693..32f2d847 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -15,7 +15,7 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -115,18 +115,41 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
+		// Translator only does format conversion, ApplyThinking handles model capability validation.
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			modelInfo := registry.LookupModelInfo(modelName)
-			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-				// Check for thinkingBudget first - if present, enable thinking with budget
-				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					out, _ = sjson.Set(out, "thinking.budget_tokens", thinkingBudget.Int())
-				} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
-					// Fallback to include_thoughts if no budget specified
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+				level := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
+				switch level {
+				case "":
+				case "none":
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				case "auto":
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				default:
+					if budget, ok := thinking.ConvertLevelToBudget(level); ok {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
 				}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				budget := int(thinkingBudget.Int())
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				default:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+				}
+			} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
+			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
 	}
diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index 8aa14793..79dc9c90 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -15,7 +15,6 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -66,23 +65,21 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 
 	root := gjson.ParseBytes(rawJSON)
 
+	// Convert OpenAI reasoning_effort to Claude thinking config.
 	if v := root.Get("reasoning_effort"); v.Exists() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-			effort := strings.ToLower(strings.TrimSpace(v.String()))
-			if effort != "" {
-				budget, ok := thinking.ConvertLevelToBudget(effort)
-				if ok {
-					switch budget {
-					case 0:
-						out, _ = sjson.Set(out, "thinking.type", "disabled")
-					case -1:
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := thinking.ConvertLevelToBudget(effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
 						out, _ = sjson.Set(out, "thinking.type", "enabled")
-					default:
-						if budget > 0 {
-							out, _ = sjson.Set(out, "thinking.type", "enabled")
-							out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
-						}
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 					}
 				}
 			}
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
index 479fb78f..5cbe23bf 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,7 +10,6 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -54,23 +53,21 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
 
 	root := gjson.ParseBytes(rawJSON)
 
+	// Convert OpenAI Responses reasoning.effort to Claude thinking config.
 	if v := root.Get("reasoning.effort"); v.Exists() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-			effort := strings.ToLower(strings.TrimSpace(v.String()))
-			if effort != "" {
-				budget, ok := thinking.ConvertLevelToBudget(effort)
-				if ok {
-					switch budget {
-					case 0:
-						out, _ = sjson.Set(out, "thinking.type", "disabled")
-					case -1:
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := thinking.ConvertLevelToBudget(effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
 						out, _ = sjson.Set(out, "thinking.type", "enabled")
-					default:
-						if budget > 0 {
-							out, _ = sjson.Set(out, "thinking.type", "enabled")
-							out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
-						}
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 					}
 				}
 			}
diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go
index 17f2f674..f0f5d867 100644
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,7 +12,6 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -218,18 +217,15 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)
 
-	// Convert thinking.budget_tokens to reasoning.effort for level-based models
-	reasoningEffort := "medium" // default
+	// Convert thinking.budget_tokens to reasoning.effort.
+	reasoningEffort := "medium"
 	if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-		modelInfo := registry.LookupModelInfo(modelName)
 		switch thinkingConfig.Get("type").String() {
 		case "enabled":
-			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
-				if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
-					budget := int(budgetTokens.Int())
-					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
-						reasoningEffort = effort
-					}
+			if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
+				budget := int(budgetTokens.Int())
+				if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
+					reasoningEffort = effort
 				}
 			}
 		case "disabled":
diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go
index d7d0a109..342c5b1a 100644
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -14,7 +14,6 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
@@ -249,22 +248,28 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
 
-	// Convert thinkingBudget to reasoning.effort for level-based models
-	reasoningEffort := "medium" // default
+	// Convert Gemini thinkingConfig to Codex reasoning.effort.
+	effortSet := false
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			modelInfo := registry.LookupModelInfo(modelName)
-			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
-				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-					budget := int(thinkingBudget.Int())
-					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
-						reasoningEffort = effort
-					}
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
+				if effort != "" {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
+					effortSet = true
+				}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
+					effortSet = true
 				}
 			}
 		}
 	}
-	out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
+	if !effortSet {
+		// No thinking config, set default effort
+		out, _ = sjson.Set(out, "reasoning.effort", "medium")
+	}
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
index f1bed88b..f4a51e8b 100644
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -9,7 +9,6 @@ import (
 	"bytes"
 	"strings"
 
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -161,14 +160,11 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil {
-			if t.Get("type").String() == "enabled" {
-				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := int(b.Int())
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-				}
+		if t.Get("type").String() == "enabled" {
+			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+				budget := int(b.Int())
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go
index f0dbd513..0d5361a5 100644
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -9,7 +9,6 @@ import (
 	"bytes"
 	"strings"
 
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -153,16 +152,13 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	}
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
-	// Only apply for models that use numeric budgets, not discrete levels.
+	// Translator only does format conversion, ApplyThinking handles model capability validation.
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-			if t.Get("type").String() == "enabled" {
-				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := int(b.Int())
-					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
-					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-				}
+		if t.Get("type").String() == "enabled" {
+			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+				budget := int(b.Int())
+				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
+				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go
index 7cdcb0f8..6e9bf637 100644
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -77,12 +77,15 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 			}
 		}
 
-		// Convert thinkingBudget to reasoning_effort
-		// Always perform conversion to support allowCompat models that may not be in registry
+		// Map Gemini thinkingConfig to OpenAI reasoning_effort.
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-				budget := int(thinkingBudget.Int())
-				if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
+				if effort != "" {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
+				}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			}

From c7e8830a563d2615753162381fc2d0937c5ce0aa Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sat, 17 Jan 2026 22:53:10 +0800
Subject: [PATCH 54/65] refactor(thinking): pass source and target formats to
 ApplyThinking for cross-format validation

Update ApplyThinking signature to accept fromFormat and toFormat parameters
instead of a single provider string. This enables:

- Proper level-to-budget conversion when source is level-based (openai/codex)
  and target is budget-based (gemini/claude)
- Strict budget range validation when source and target formats match
- Level clamping to nearest supported level for cross-format requests
- Format alias resolution in SDK translator registry for codex/openai-response

Also adds ErrBudgetOutOfRange error code and improves iflow config extraction
to fall back to openai format when iflow-specific config is not present.
---
 .../runtime/executor/aistudio_executor.go     |   2 +-
 .../runtime/executor/antigravity_executor.go  |   8 +-
 internal/runtime/executor/claude_executor.go  |   4 +-
 internal/runtime/executor/codex_executor.go   |   6 +-
 .../runtime/executor/gemini_cli_executor.go   |   6 +-
 internal/runtime/executor/gemini_executor.go  |   6 +-
 .../executor/gemini_vertex_executor.go        |  12 +-
 internal/runtime/executor/iflow_executor.go   |   4 +-
 .../executor/openai_compat_executor.go        |   6 +-
 internal/runtime/executor/qwen_executor.go    |   4 +-
 internal/thinking/apply.go                    |  86 ++++--
 internal/thinking/errors.go                   |   4 +
 internal/thinking/strip.go                    |  32 ++-
 internal/thinking/validate.go                 | 270 ++++++++++++------
 sdk/translator/registry.go                    |  62 +++-
 15 files changed, 341 insertions(+), 171 deletions(-)

diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go
index fffb50c4..a020c670 100644
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -393,7 +393,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
-	payload, err := thinking.ApplyThinking(payload, req.Model, "gemini")
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, translatedPayload{}, err
 	}
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 47113cfc..99392188 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -137,7 +137,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -256,7 +256,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -622,7 +622,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
@@ -802,7 +802,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	// Prepare payload once (doesn't depend on baseURL)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	payload, err := thinking.ApplyThinking(payload, req.Model, "antigravity")
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index b4cbd450..17c5a143 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -106,7 +106,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "claude")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -239,7 +239,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "claude")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index eeefe6bc..cc0e32a1 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -96,7 +96,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "codex")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -208,7 +208,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
 	body = misc.StripCodexUserAgent(body)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "codex")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
@@ -316,7 +316,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
 
-	body, err := thinking.ApplyThinking(body, req.Model, "codex")
+	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index add01cb3..b23406af 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -123,7 +123,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -272,7 +272,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
@@ -479,7 +479,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	for range models {
 		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-		payload, err = thinking.ApplyThinking(payload, req.Model, "gemini-cli")
+		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
 		if err != nil {
 			return cliproxyexecutor.Response{}, err
 		}
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index 4cc5d945..e9f9dbca 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -120,7 +120,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -222,7 +222,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
@@ -338,7 +338,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index 8a412b47..20e59b3f 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -170,7 +170,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -272,7 +272,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -375,7 +375,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
@@ -494,7 +494,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
@@ -605,7 +605,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
@@ -689,7 +689,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index 6ce4221c..3e6ca4e5 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -92,7 +92,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow")
 	if err != nil {
 		return resp, err
 	}
@@ -190,7 +190,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow")
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 6ae9103f..a2bef724 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -92,7 +92,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -187,7 +187,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
@@ -297,7 +297,7 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau
 
 	modelForCounting := baseModel
 
-	translated, err := thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index ff35c935..260165d9 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -86,7 +86,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "openai")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return resp, err
 	}
@@ -172,7 +172,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, "openai")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index 003405c0..fe7d59b4 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -2,6 +2,8 @@
 package thinking
 
 import (
+	"strings"
+
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -59,7 +61,8 @@ func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 // Parameters:
 //   - body: Original request body JSON
 //   - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
-//   - provider: Provider name (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
+//   - fromFormat: Source request format (e.g., openai, codex, gemini)
+//   - toFormat: Target provider format for the request body (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
 //
 // Returns:
 //   - Modified request body JSON with thinking configuration applied
@@ -76,16 +79,21 @@ func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 // Example:
 //
 //	// With suffix - suffix config takes priority
-//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini")
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini")
 //
 //	// Without suffix - uses body config
-//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini")
-func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini")
+func ApplyThinking(body []byte, model string, fromFormat string, toFormat string) ([]byte, error) {
+	providerFormat := strings.ToLower(strings.TrimSpace(toFormat))
+	fromFormat = strings.ToLower(strings.TrimSpace(fromFormat))
+	if fromFormat == "" {
+		fromFormat = providerFormat
+	}
 	// 1. Route check: Get provider applier
-	applier := GetProviderApplier(provider)
+	applier := GetProviderApplier(providerFormat)
 	if applier == nil {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    model,
 		}).Debug("thinking: unknown provider, passthrough |")
 		return body, nil
@@ -100,19 +108,19 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	// Unknown models are treated as user-defined so thinking config can still be applied.
 	// The upstream service is responsible for validating the configuration.
 	if IsUserDefinedModel(modelInfo) {
-		return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
+		return applyUserDefinedModel(body, modelInfo, fromFormat, providerFormat, suffixResult)
 	}
 	if modelInfo.Thinking == nil {
-		config := extractThinkingConfig(body, provider)
+		config := extractThinkingConfig(body, providerFormat)
 		if hasThinkingConfig(config) {
 			log.WithFields(log.Fields{
 				"model":    baseModel,
-				"provider": provider,
+				"provider": providerFormat,
 			}).Debug("thinking: model does not support thinking, stripping config |")
-			return StripThinkingConfig(body, provider), nil
+			return StripThinkingConfig(body, providerFormat), nil
 		}
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    baseModel,
 		}).Debug("thinking: model does not support thinking, passthrough |")
 		return body, nil
@@ -121,19 +129,19 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	// 4. Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
-		config = parseSuffixToConfig(suffixResult.RawSuffix, provider, model)
+		config = parseSuffixToConfig(suffixResult.RawSuffix, providerFormat, model)
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    model,
 			"mode":     config.Mode,
 			"budget":   config.Budget,
 			"level":    config.Level,
 		}).Debug("thinking: config from model suffix |")
 	} else {
-		config = extractThinkingConfig(body, provider)
+		config = extractThinkingConfig(body, providerFormat)
 		if hasThinkingConfig(config) {
 			log.WithFields(log.Fields{
-				"provider": provider,
+				"provider": providerFormat,
 				"model":    modelInfo.ID,
 				"mode":     config.Mode,
 				"budget":   config.Budget,
@@ -144,17 +152,17 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 
 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    modelInfo.ID,
 		}).Debug("thinking: no config found, passthrough |")
 		return body, nil
 	}
 
 	// 5. Validate and normalize configuration
-	validated, err := ValidateConfig(config, modelInfo, provider)
+	validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat)
 	if err != nil {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    modelInfo.ID,
 			"error":    err.Error(),
 		}).Warn("thinking: validation failed |")
@@ -167,14 +175,14 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	// Defensive check: ValidateConfig should never return (nil, nil)
 	if validated == nil {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    modelInfo.ID,
 		}).Warn("thinking: ValidateConfig returned nil config without error, passthrough |")
 		return body, nil
 	}
 
 	log.WithFields(log.Fields{
-		"provider": provider,
+		"provider": providerFormat,
 		"model":    modelInfo.ID,
 		"mode":     validated.Mode,
 		"budget":   validated.Budget,
@@ -228,7 +236,7 @@ func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig {
 
 // applyUserDefinedModel applies thinking configuration for user-defined models
 // without ThinkingSupport validation.
-func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider string, suffixResult SuffixResult) ([]byte, error) {
+func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, fromFormat, toFormat string, suffixResult SuffixResult) ([]byte, error) {
 	// Get model ID for logging
 	modelID := ""
 	if modelInfo != nil {
@@ -240,39 +248,57 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 	// Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
-		config = parseSuffixToConfig(suffixResult.RawSuffix, provider, modelID)
+		config = parseSuffixToConfig(suffixResult.RawSuffix, toFormat, modelID)
 	} else {
-		config = extractThinkingConfig(body, provider)
+		config = extractThinkingConfig(body, toFormat)
 	}
 
 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
 			"model":    modelID,
-			"provider": provider,
+			"provider": toFormat,
 		}).Debug("thinking: user-defined model, passthrough (no config) |")
 		return body, nil
 	}
 
-	applier := GetProviderApplier(provider)
+	applier := GetProviderApplier(toFormat)
 	if applier == nil {
 		log.WithFields(log.Fields{
 			"model":    modelID,
-			"provider": provider,
+			"provider": toFormat,
 		}).Debug("thinking: user-defined model, passthrough (unknown provider) |")
 		return body, nil
 	}
 
 	log.WithFields(log.Fields{
-		"provider": provider,
+		"provider": toFormat,
 		"model":    modelID,
 		"mode":     config.Mode,
 		"budget":   config.Budget,
 		"level":    config.Level,
 	}).Debug("thinking: applying config for user-defined model (skip validation)")
 
+	config = normalizeUserDefinedConfig(config, fromFormat, toFormat)
 	return applier.Apply(body, config, modelInfo)
 }
 
+func normalizeUserDefinedConfig(config ThinkingConfig, fromFormat, toFormat string) ThinkingConfig {
+	if config.Mode != ModeLevel {
+		return config
+	}
+	if !isBudgetBasedProvider(toFormat) || !isLevelBasedProvider(fromFormat) {
+		return config
+	}
+	budget, ok := ConvertLevelToBudget(string(config.Level))
+	if !ok {
+		return config
+	}
+	config.Mode = ModeBudget
+	config.Budget = budget
+	config.Level = ""
+	return config
+}
+
 // extractThinkingConfig extracts provider-specific thinking config from request body.
 func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
 	if len(body) == 0 || !gjson.ValidBytes(body) {
@@ -289,7 +315,11 @@ func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
 	case "codex":
 		return extractCodexConfig(body)
 	case "iflow":
-		return extractIFlowConfig(body)
+		config := extractIFlowConfig(body)
+		if hasThinkingConfig(config) {
+			return config
+		}
+		return extractOpenAIConfig(body)
 	default:
 		return ThinkingConfig{}
 	}
diff --git a/internal/thinking/errors.go b/internal/thinking/errors.go
index 1cf9ccd0..5eed9381 100644
--- a/internal/thinking/errors.go
+++ b/internal/thinking/errors.go
@@ -24,6 +24,10 @@ const (
 	// Example: using level with a budget-only model
 	ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED"
 
+	// ErrBudgetOutOfRange indicates the budget value is outside model range.
+	// Example: budget 64000 exceeds max 20000
+	ErrBudgetOutOfRange ErrorCode = "BUDGET_OUT_OF_RANGE"
+
 	// ErrProviderMismatch indicates the provider does not match the model.
 	// Example: applying Claude format to a Gemini model
 	ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH"
diff --git a/internal/thinking/strip.go b/internal/thinking/strip.go
index 4904d4d5..eb691715 100644
--- a/internal/thinking/strip.go
+++ b/internal/thinking/strip.go
@@ -27,28 +27,32 @@ func StripThinkingConfig(body []byte, provider string) []byte {
 		return body
 	}
 
+	var paths []string
 	switch provider {
 	case "claude":
-		result, _ := sjson.DeleteBytes(body, "thinking")
-		return result
+		paths = []string{"thinking"}
 	case "gemini":
-		result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
-		return result
+		paths = []string{"generationConfig.thinkingConfig"}
 	case "gemini-cli", "antigravity":
-		result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
-		return result
+		paths = []string{"request.generationConfig.thinkingConfig"}
 	case "openai":
-		result, _ := sjson.DeleteBytes(body, "reasoning_effort")
-		return result
+		paths = []string{"reasoning_effort"}
 	case "codex":
-		result, _ := sjson.DeleteBytes(body, "reasoning.effort")
-		return result
+		paths = []string{"reasoning.effort"}
 	case "iflow":
-		result, _ := sjson.DeleteBytes(body, "chat_template_kwargs.enable_thinking")
-		result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
-		result, _ = sjson.DeleteBytes(result, "reasoning_split")
-		return result
+		paths = []string{
+			"chat_template_kwargs.enable_thinking",
+			"chat_template_kwargs.clear_thinking",
+			"reasoning_split",
+			"reasoning_effort",
+		}
 	default:
 		return body
 	}
+
+	result := body
+	for _, path := range paths {
+		result, _ = sjson.DeleteBytes(result, path)
+	}
+	return result
 }
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index aabe04eb..853e187d 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -9,64 +9,6 @@ import (
 	log "github.com/sirupsen/logrus"
 )
 
-// ClampBudget clamps a budget value to the model's supported range.
-//
-// Logging:
-//   - Warn when value=0 but ZeroAllowed=false
-//   - Debug when value is clamped to min/max
-//
-// Fields: provider, model, original_value, clamped_to, min, max
-func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
-	model := "unknown"
-	support := (*registry.ThinkingSupport)(nil)
-	if modelInfo != nil {
-		if modelInfo.ID != "" {
-			model = modelInfo.ID
-		}
-		support = modelInfo.Thinking
-	}
-	if support == nil {
-		return value
-	}
-
-	// Auto value (-1) passes through without clamping.
-	if value == -1 {
-		return value
-	}
-
-	min := support.Min
-	max := support.Max
-	if value == 0 && !support.ZeroAllowed {
-		log.WithFields(log.Fields{
-			"provider":       provider,
-			"model":          model,
-			"original_value": value,
-			"clamped_to":     min,
-			"min":            min,
-			"max":            max,
-		}).Warn("thinking: budget zero not allowed |")
-		return min
-	}
-
-	// Some models are level-only and do not define numeric budget ranges.
-	if min == 0 && max == 0 {
-		return value
-	}
-
-	if value < min {
-		if value == 0 && support.ZeroAllowed {
-			return 0
-		}
-		logClamp(provider, model, value, min, min, max)
-		return min
-	}
-	if value > max {
-		logClamp(provider, model, value, max, min, max)
-		return max
-	}
-	return value
-}
-
 // ValidateConfig validates a thinking configuration against model capabilities.
 //
 // This function performs comprehensive validation:
@@ -74,10 +16,14 @@ func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int
 //   - Auto-converts between Budget and Level formats based on model capability
 //   - Validates that requested level is in the model's supported levels list
 //   - Clamps budget values to model's allowed range
+//   - When converting Budget -> Level for level-only models, clamps the derived standard level to the nearest supported level
+//     (special values none/auto are preserved)
 //
 // Parameters:
 //   - config: The thinking configuration to validate
 //   - support: Model's ThinkingSupport properties (nil means no thinking support)
+//   - fromFormat: Source provider format (used to determine strict validation rules)
+//   - toFormat: Target provider format
 //
 // Returns:
 //   - Normalized ThinkingConfig with clamped values
@@ -87,9 +33,9 @@ func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int
 //   - Budget-only model + Level config → Level converted to Budget
 //   - Level-only model + Budget config → Budget converted to Level
 //   - Hybrid model → preserve original format
-func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provider string) (*ThinkingConfig, error) {
+func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string) (*ThinkingConfig, error) {
+	fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat))
 	normalized := config
-
 	model := "unknown"
 	support := (*registry.ThinkingSupport)(nil)
 	if modelInfo != nil {
@@ -106,6 +52,9 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 		return &normalized, nil
 	}
 
+	allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat)
+	strictBudget := fromFormat != "" && fromFormat == toFormat
+
 	capability := detectModelCapability(modelInfo)
 	switch capability {
 	case CapabilityBudgetOnly:
@@ -127,8 +76,10 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 			if !ok {
 				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", normalized.Budget))
 			}
+			// When converting Budget -> Level for level-only models, clamp the derived standard level
+			// to the nearest supported level. Special values (none/auto) are preserved.
 			normalized.Mode = ModeLevel
-			normalized.Level = ThinkingLevel(level)
+			normalized.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat)
 			normalized.Budget = 0
 		}
 	case CapabilityHybrid:
@@ -151,18 +102,35 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 
 	if len(support.Levels) > 0 && normalized.Mode == ModeLevel {
 		if !isLevelSupported(string(normalized.Level), support.Levels) {
-			validLevels := normalizeLevels(support.Levels)
-			message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(normalized.Level)), strings.Join(validLevels, ", "))
-			return nil, NewThinkingError(ErrLevelNotSupported, message)
+			if allowClampUnsupported {
+				normalized.Level = clampLevel(normalized.Level, modelInfo, toFormat)
+			}
+			if !isLevelSupported(string(normalized.Level), support.Levels) {
+				// User explicitly specified an unsupported level - return error
+				// (budget-derived levels may be clamped based on source format)
+				validLevels := normalizeLevels(support.Levels)
+				message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(normalized.Level)), strings.Join(validLevels, ", "))
+				return nil, NewThinkingError(ErrLevelNotSupported, message)
+			}
+		}
+	}
+
+	if strictBudget && normalized.Mode == ModeBudget {
+		min, max := support.Min, support.Max
+		if min != 0 || max != 0 {
+			if normalized.Budget < min || normalized.Budget > max || (normalized.Budget == 0 && !support.ZeroAllowed) {
+				message := fmt.Sprintf("budget %d out of range [%d,%d]", normalized.Budget, min, max)
+				return nil, NewThinkingError(ErrBudgetOutOfRange, message)
+			}
 		}
 	}
 
 	// Convert ModeAuto to mid-range if dynamic not allowed
 	if normalized.Mode == ModeAuto && !support.DynamicAllowed {
-		normalized = convertAutoToMidRange(normalized, support, provider, model)
+		normalized = convertAutoToMidRange(normalized, support, toFormat, model)
 	}
 
-	if normalized.Mode == ModeNone && provider == "claude" {
+	if normalized.Mode == ModeNone && toFormat == "claude" {
 		// Claude supports explicit disable via thinking.type="disabled".
 		// Keep Budget=0 so applier can omit budget_tokens.
 		normalized.Budget = 0
@@ -170,7 +138,7 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 	} else {
 		switch normalized.Mode {
 		case ModeBudget, ModeAuto, ModeNone:
-			normalized.Budget = ClampBudget(normalized.Budget, modelInfo, provider)
+			normalized.Budget = clampBudget(normalized.Budget, modelInfo, toFormat)
 		}
 
 		// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
@@ -183,23 +151,6 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 	return &normalized, nil
 }
 
-func isLevelSupported(level string, supported []string) bool {
-	for _, candidate := range supported {
-		if strings.EqualFold(level, strings.TrimSpace(candidate)) {
-			return true
-		}
-	}
-	return false
-}
-
-func normalizeLevels(levels []string) []string {
-	normalized := make([]string, 0, len(levels))
-	for _, level := range levels {
-		normalized = append(normalized, strings.ToLower(strings.TrimSpace(level)))
-	}
-	return normalized
-}
-
 // convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
 //
 // This function handles the case where a model does not support dynamic/auto thinking.
@@ -246,7 +197,156 @@ func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupp
 	return config
 }
 
-// logClamp logs a debug message when budget clamping occurs.
+// standardLevelOrder defines the canonical ordering of thinking levels from lowest to highest.
+var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh}
+
+// clampLevel clamps the given level to the nearest supported level.
+// On tie, prefers the lower level.
+func clampLevel(level ThinkingLevel, modelInfo *registry.ModelInfo, provider string) ThinkingLevel {
+	model := "unknown"
+	var supported []string
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		if modelInfo.Thinking != nil {
+			supported = modelInfo.Thinking.Levels
+		}
+	}
+
+	if len(supported) == 0 || isLevelSupported(string(level), supported) {
+		return level
+	}
+
+	pos := levelIndex(string(level))
+	if pos == -1 {
+		return level
+	}
+	bestIdx, bestDist := -1, len(standardLevelOrder)+1
+
+	for _, s := range supported {
+		if idx := levelIndex(strings.TrimSpace(s)); idx != -1 {
+			if dist := abs(pos - idx); dist < bestDist || (dist == bestDist && idx < bestIdx) {
+				bestIdx, bestDist = idx, dist
+			}
+		}
+	}
+
+	if bestIdx >= 0 {
+		clamped := standardLevelOrder[bestIdx]
+		log.WithFields(log.Fields{
+			"provider":       provider,
+			"model":          model,
+			"original_level": string(level),
+			"clamped_to":     string(clamped),
+		}).Debug("thinking: level clamped |")
+		return clamped
+	}
+	return level
+}
+
+// clampBudget clamps a budget value to the model's supported range.
+func clampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
+	model := "unknown"
+	support := (*registry.ThinkingSupport)(nil)
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		support = modelInfo.Thinking
+	}
+	if support == nil {
+		return value
+	}
+
+	// Auto value (-1) passes through without clamping.
+	if value == -1 {
+		return value
+	}
+
+	min, max := support.Min, support.Max
+	if value == 0 && !support.ZeroAllowed {
+		log.WithFields(log.Fields{
+			"provider":       provider,
+			"model":          model,
+			"original_value": value,
+			"clamped_to":     min,
+			"min":            min,
+			"max":            max,
+		}).Warn("thinking: budget zero not allowed |")
+		return min
+	}
+
+	// Some models are level-only and do not define numeric budget ranges.
+	if min == 0 && max == 0 {
+		return value
+	}
+
+	if value < min {
+		if value == 0 && support.ZeroAllowed {
+			return 0
+		}
+		logClamp(provider, model, value, min, min, max)
+		return min
+	}
+	if value > max {
+		logClamp(provider, model, value, max, min, max)
+		return max
+	}
+	return value
+}
+
+func isLevelSupported(level string, supported []string) bool {
+	for _, s := range supported {
+		if strings.EqualFold(level, strings.TrimSpace(s)) {
+			return true
+		}
+	}
+	return false
+}
+
+func levelIndex(level string) int {
+	for i, l := range standardLevelOrder {
+		if strings.EqualFold(level, string(l)) {
+			return i
+		}
+	}
+	return -1
+}
+
+func normalizeLevels(levels []string) []string {
+	out := make([]string, len(levels))
+	for i, l := range levels {
+		out[i] = strings.ToLower(strings.TrimSpace(l))
+	}
+	return out
+}
+
+func isBudgetBasedProvider(provider string) bool {
+	switch provider {
+	case "gemini", "gemini-cli", "antigravity", "claude":
+		return true
+	default:
+		return false
+	}
+}
+
+func isLevelBasedProvider(provider string) bool {
+	switch provider {
+	case "openai", "openai-response", "codex":
+		return true
+	default:
+		return false
+	}
+}
+
+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
 func logClamp(provider, model string, original, clampedTo, min, max int) {
 	log.WithFields(log.Fields{
 		"provider":       provider,
diff --git a/sdk/translator/registry.go b/sdk/translator/registry.go
index ace97137..882e80f6 100644
--- a/sdk/translator/registry.go
+++ b/sdk/translator/registry.go
@@ -38,15 +38,31 @@ func (r *Registry) Register(from, to Format, request RequestTransform, response
 	r.responses[from][to] = response
 }
 
+// formatAliases returns compatible aliases for a format, ordered by preference.
+func formatAliases(format Format) []Format {
+	switch format {
+	case "codex":
+		return []Format{"codex", "openai-response"}
+	case "openai-response":
+		return []Format{"openai-response", "codex"}
+	default:
+		return []Format{format}
+	}
+}
+
 // TranslateRequest converts a payload between schemas, returning the original payload
 // if no translator is registered.
 func (r *Registry) TranslateRequest(from, to Format, model string, rawJSON []byte, stream bool) []byte {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	if byTarget, ok := r.requests[from]; ok {
-		if fn, isOk := byTarget[to]; isOk && fn != nil {
-			return fn(model, rawJSON, stream)
+	for _, fromFormat := range formatAliases(from) {
+		if byTarget, ok := r.requests[fromFormat]; ok {
+			for _, toFormat := range formatAliases(to) {
+				if fn, isOk := byTarget[toFormat]; isOk && fn != nil {
+					return fn(model, rawJSON, stream)
+				}
+			}
 		}
 	}
 	return rawJSON
@@ -57,9 +73,13 @@ func (r *Registry) HasResponseTransformer(from, to Format) bool {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	if byTarget, ok := r.responses[from]; ok {
-		if _, isOk := byTarget[to]; isOk {
-			return true
+	for _, toFormat := range formatAliases(to) {
+		if byTarget, ok := r.responses[toFormat]; ok {
+			for _, fromFormat := range formatAliases(from) {
+				if _, isOk := byTarget[fromFormat]; isOk {
+					return true
+				}
+			}
 		}
 	}
 	return false
@@ -70,9 +90,13 @@ func (r *Registry) TranslateStream(ctx context.Context, from, to Format, model s
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	if byTarget, ok := r.responses[to]; ok {
-		if fn, isOk := byTarget[from]; isOk && fn.Stream != nil {
-			return fn.Stream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
+	for _, toFormat := range formatAliases(to) {
+		if byTarget, ok := r.responses[toFormat]; ok {
+			for _, fromFormat := range formatAliases(from) {
+				if fn, isOk := byTarget[fromFormat]; isOk && fn.Stream != nil {
+					return fn.Stream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
+				}
+			}
 		}
 	}
 	return []string{string(rawJSON)}
@@ -83,9 +107,13 @@ func (r *Registry) TranslateNonStream(ctx context.Context, from, to Format, mode
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	if byTarget, ok := r.responses[to]; ok {
-		if fn, isOk := byTarget[from]; isOk && fn.NonStream != nil {
-			return fn.NonStream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
+	for _, toFormat := range formatAliases(to) {
+		if byTarget, ok := r.responses[toFormat]; ok {
+			for _, fromFormat := range formatAliases(from) {
+				if fn, isOk := byTarget[fromFormat]; isOk && fn.NonStream != nil {
+					return fn.NonStream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
+				}
+			}
 		}
 	}
 	return string(rawJSON)
@@ -96,9 +124,13 @@ func (r *Registry) TranslateTokenCount(ctx context.Context, from, to Format, cou
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	if byTarget, ok := r.responses[to]; ok {
-		if fn, isOk := byTarget[from]; isOk && fn.TokenCount != nil {
-			return fn.TokenCount(ctx, count)
+	for _, toFormat := range formatAliases(to) {
+		if byTarget, ok := r.responses[toFormat]; ok {
+			for _, fromFormat := range formatAliases(from) {
+				if fn, isOk := byTarget[fromFormat]; isOk && fn.TokenCount != nil {
+					return fn.TokenCount(ctx, count)
+				}
+			}
 		}
 	}
 	return string(rawJSON)

From 03005b5d299b62fc1ed7eee55506c5a687243948 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 18 Jan 2026 11:30:53 +0800
Subject: [PATCH 55/65] refactor(thinking): add Gemini family provider grouping
 for strict validation

---
 internal/logging/global_logger.go |   2 +-
 internal/thinking/validate.go     | 107 +++++++++++--------
 sdk/translator/registry.go        |  62 +++--------
 test/thinking_conversion_test.go  | 168 +++++++++++++++++++++++++++---
 4 files changed, 230 insertions(+), 109 deletions(-)

diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go
index 63c7af46..a5630964 100644
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -30,7 +30,7 @@ var (
 type LogFormatter struct{}
 
 // logFieldOrder defines the display order for common log fields.
-var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_value", "min", "max", "clamped_to", "error"}
+var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_value", "original_level", "min", "max", "clamped_to", "error"}
 
 // Format renders a single log entry with custom formatting.
 func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index 853e187d..5ce113f7 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -35,7 +35,6 @@ import (
 //   - Hybrid model → preserve original format
 func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string) (*ThinkingConfig, error) {
 	fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat))
-	normalized := config
 	model := "unknown"
 	support := (*registry.ThinkingSupport)(nil)
 	if modelInfo != nil {
@@ -49,106 +48,108 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFo
 		if config.Mode != ModeNone {
 			return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", model)
 		}
-		return &normalized, nil
+		return &config, nil
 	}
 
 	allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat)
-	strictBudget := fromFormat != "" && fromFormat == toFormat
+	strictBudget := fromFormat != "" && isSameProviderFamily(fromFormat, toFormat)
+	budgetDerivedFromLevel := false
 
 	capability := detectModelCapability(modelInfo)
 	switch capability {
 	case CapabilityBudgetOnly:
-		if normalized.Mode == ModeLevel {
-			if normalized.Level == LevelAuto {
+		if config.Mode == ModeLevel {
+			if config.Level == LevelAuto {
 				break
 			}
-			budget, ok := ConvertLevelToBudget(string(normalized.Level))
+			budget, ok := ConvertLevelToBudget(string(config.Level))
 			if !ok {
-				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", normalized.Level))
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", config.Level))
 			}
-			normalized.Mode = ModeBudget
-			normalized.Budget = budget
-			normalized.Level = ""
+			config.Mode = ModeBudget
+			config.Budget = budget
+			config.Level = ""
+			budgetDerivedFromLevel = true
 		}
 	case CapabilityLevelOnly:
-		if normalized.Mode == ModeBudget {
-			level, ok := ConvertBudgetToLevel(normalized.Budget)
+		if config.Mode == ModeBudget {
+			level, ok := ConvertBudgetToLevel(config.Budget)
 			if !ok {
-				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", normalized.Budget))
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", config.Budget))
 			}
 			// When converting Budget -> Level for level-only models, clamp the derived standard level
 			// to the nearest supported level. Special values (none/auto) are preserved.
-			normalized.Mode = ModeLevel
-			normalized.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat)
-			normalized.Budget = 0
+			config.Mode = ModeLevel
+			config.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat)
+			config.Budget = 0
 		}
 	case CapabilityHybrid:
 	}
 
-	if normalized.Mode == ModeLevel && normalized.Level == LevelNone {
-		normalized.Mode = ModeNone
-		normalized.Budget = 0
-		normalized.Level = ""
+	if config.Mode == ModeLevel && config.Level == LevelNone {
+		config.Mode = ModeNone
+		config.Budget = 0
+		config.Level = ""
 	}
-	if normalized.Mode == ModeLevel && normalized.Level == LevelAuto {
-		normalized.Mode = ModeAuto
-		normalized.Budget = -1
-		normalized.Level = ""
+	if config.Mode == ModeLevel && config.Level == LevelAuto {
+		config.Mode = ModeAuto
+		config.Budget = -1
+		config.Level = ""
 	}
-	if normalized.Mode == ModeBudget && normalized.Budget == 0 {
-		normalized.Mode = ModeNone
-		normalized.Level = ""
+	if config.Mode == ModeBudget && config.Budget == 0 {
+		config.Mode = ModeNone
+		config.Level = ""
 	}
 
-	if len(support.Levels) > 0 && normalized.Mode == ModeLevel {
-		if !isLevelSupported(string(normalized.Level), support.Levels) {
+	if len(support.Levels) > 0 && config.Mode == ModeLevel {
+		if !isLevelSupported(string(config.Level), support.Levels) {
 			if allowClampUnsupported {
-				normalized.Level = clampLevel(normalized.Level, modelInfo, toFormat)
+				config.Level = clampLevel(config.Level, modelInfo, toFormat)
 			}
-			if !isLevelSupported(string(normalized.Level), support.Levels) {
+			if !isLevelSupported(string(config.Level), support.Levels) {
 				// User explicitly specified an unsupported level - return error
 				// (budget-derived levels may be clamped based on source format)
 				validLevels := normalizeLevels(support.Levels)
-				message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(normalized.Level)), strings.Join(validLevels, ", "))
+				message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(config.Level)), strings.Join(validLevels, ", "))
 				return nil, NewThinkingError(ErrLevelNotSupported, message)
 			}
 		}
 	}
 
-	if strictBudget && normalized.Mode == ModeBudget {
+	if strictBudget && config.Mode == ModeBudget && !budgetDerivedFromLevel {
 		min, max := support.Min, support.Max
 		if min != 0 || max != 0 {
-			if normalized.Budget < min || normalized.Budget > max || (normalized.Budget == 0 && !support.ZeroAllowed) {
-				message := fmt.Sprintf("budget %d out of range [%d,%d]", normalized.Budget, min, max)
+			if config.Budget < min || config.Budget > max || (config.Budget == 0 && !support.ZeroAllowed) {
+				message := fmt.Sprintf("budget %d out of range [%d,%d]", config.Budget, min, max)
 				return nil, NewThinkingError(ErrBudgetOutOfRange, message)
 			}
 		}
 	}
 
 	// Convert ModeAuto to mid-range if dynamic not allowed
-	if normalized.Mode == ModeAuto && !support.DynamicAllowed {
-		normalized = convertAutoToMidRange(normalized, support, toFormat, model)
+	if config.Mode == ModeAuto && !support.DynamicAllowed {
+		config = convertAutoToMidRange(config, support, toFormat, model)
 	}
 
-	if normalized.Mode == ModeNone && toFormat == "claude" {
+	if config.Mode == ModeNone && toFormat == "claude" {
 		// Claude supports explicit disable via thinking.type="disabled".
 		// Keep Budget=0 so applier can omit budget_tokens.
-		normalized.Budget = 0
-		normalized.Level = ""
+		config.Budget = 0
+		config.Level = ""
 	} else {
-		switch normalized.Mode {
+		switch config.Mode {
 		case ModeBudget, ModeAuto, ModeNone:
-			normalized.Budget = clampBudget(normalized.Budget, modelInfo, toFormat)
+			config.Budget = clampBudget(config.Budget, modelInfo, toFormat)
 		}
 
 		// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
 		// This ensures Apply layer doesn't need to access support.Levels
-		if normalized.Mode == ModeNone && normalized.Budget > 0 && len(support.Levels) > 0 {
-			normalized.Level = ThinkingLevel(support.Levels[0])
+		if config.Mode == ModeNone && config.Budget > 0 && len(support.Levels) > 0 {
+			config.Level = ThinkingLevel(support.Levels[0])
 		}
 	}
 
-	return &normalized, nil
+	return &config, nil
 }
 
 // convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
@@ -340,6 +341,22 @@ func isLevelBasedProvider(provider string) bool {
 	}
 }
 
+func isGeminiFamily(provider string) bool {
+	switch provider {
+	case "gemini", "gemini-cli", "antigravity":
+		return true
+	default:
+		return false
+	}
+}
+
+func isSameProviderFamily(from, to string) bool {
+	if from == to {
+		return true
+	}
+	return isGeminiFamily(from) && isGeminiFamily(to)
+}
+
 func abs(x int) int {
 	if x < 0 {
 		return -x
diff --git a/sdk/translator/registry.go b/sdk/translator/registry.go
index 882e80f6..ace97137 100644
--- a/sdk/translator/registry.go
+++ b/sdk/translator/registry.go
@@ -38,31 +38,15 @@ func (r *Registry) Register(from, to Format, request RequestTransform, response
 	r.responses[from][to] = response
 }
 
-// formatAliases returns compatible aliases for a format, ordered by preference.
-func formatAliases(format Format) []Format {
-	switch format {
-	case "codex":
-		return []Format{"codex", "openai-response"}
-	case "openai-response":
-		return []Format{"openai-response", "codex"}
-	default:
-		return []Format{format}
-	}
-}
-
 // TranslateRequest converts a payload between schemas, returning the original payload
 // if no translator is registered.
 func (r *Registry) TranslateRequest(from, to Format, model string, rawJSON []byte, stream bool) []byte {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	for _, fromFormat := range formatAliases(from) {
-		if byTarget, ok := r.requests[fromFormat]; ok {
-			for _, toFormat := range formatAliases(to) {
-				if fn, isOk := byTarget[toFormat]; isOk && fn != nil {
-					return fn(model, rawJSON, stream)
-				}
-			}
+	if byTarget, ok := r.requests[from]; ok {
+		if fn, isOk := byTarget[to]; isOk && fn != nil {
+			return fn(model, rawJSON, stream)
 		}
 	}
 	return rawJSON
@@ -73,13 +57,9 @@ func (r *Registry) HasResponseTransformer(from, to Format) bool {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	for _, toFormat := range formatAliases(to) {
-		if byTarget, ok := r.responses[toFormat]; ok {
-			for _, fromFormat := range formatAliases(from) {
-				if _, isOk := byTarget[fromFormat]; isOk {
-					return true
-				}
-			}
+	if byTarget, ok := r.responses[from]; ok {
+		if _, isOk := byTarget[to]; isOk {
+			return true
 		}
 	}
 	return false
@@ -90,13 +70,9 @@ func (r *Registry) TranslateStream(ctx context.Context, from, to Format, model s
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	for _, toFormat := range formatAliases(to) {
-		if byTarget, ok := r.responses[toFormat]; ok {
-			for _, fromFormat := range formatAliases(from) {
-				if fn, isOk := byTarget[fromFormat]; isOk && fn.Stream != nil {
-					return fn.Stream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
-				}
-			}
+	if byTarget, ok := r.responses[to]; ok {
+		if fn, isOk := byTarget[from]; isOk && fn.Stream != nil {
+			return fn.Stream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
 		}
 	}
 	return []string{string(rawJSON)}
@@ -107,13 +83,9 @@ func (r *Registry) TranslateNonStream(ctx context.Context, from, to Format, mode
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	for _, toFormat := range formatAliases(to) {
-		if byTarget, ok := r.responses[toFormat]; ok {
-			for _, fromFormat := range formatAliases(from) {
-				if fn, isOk := byTarget[fromFormat]; isOk && fn.NonStream != nil {
-					return fn.NonStream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
-				}
-			}
+	if byTarget, ok := r.responses[to]; ok {
+		if fn, isOk := byTarget[from]; isOk && fn.NonStream != nil {
+			return fn.NonStream(ctx, model, originalRequestRawJSON, requestRawJSON, rawJSON, param)
 		}
 	}
 	return string(rawJSON)
@@ -124,13 +96,9 @@ func (r *Registry) TranslateTokenCount(ctx context.Context, from, to Format, cou
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 
-	for _, toFormat := range formatAliases(to) {
-		if byTarget, ok := r.responses[toFormat]; ok {
-			for _, fromFormat := range formatAliases(from) {
-				if fn, isOk := byTarget[fromFormat]; isOk && fn.TokenCount != nil {
-					return fn.TokenCount(ctx, count)
-				}
-			}
+	if byTarget, ok := r.responses[to]; ok {
+		if fn, isOk := byTarget[from]; isOk && fn.TokenCount != nil {
+			return fn.TokenCount(ctx, count)
 		}
 	}
 	return string(rawJSON)
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 91490fa2..397bbbff 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -921,10 +921,10 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			expectValue: "8192",
 			expectErr:   false,
 		},
-		// Case 78: Codex to Gemini budget 8192 → passthrough → 8192
+		// Case 78: OpenAI-Response to Gemini budget 8192 → passthrough → 8192
 		{
 			name:            "78",
-			from:            "codex",
+			from:            "openai-response",
 			to:              "gemini",
 			model:           "user-defined-model(8192)",
 			inputJSON:       `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
@@ -933,10 +933,10 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			includeThoughts: "true",
 			expectErr:       false,
 		},
-		// Case 79: Codex to Claude budget 8192 → passthrough → 8192
+		// Case 79: OpenAI-Response to Claude budget 8192 → passthrough → 8192
 		{
 			name:        "79",
-			from:        "codex",
+			from:        "openai-response",
 			to:          "claude",
 			model:       "user-defined-model(8192)",
 			inputJSON:   `{"model":"user-defined-model(8192)","input":[{"role":"user","content":"hi"}]}`,
@@ -968,10 +968,10 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			expectField: "",
 			expectErr:   true,
 		},
-		// Case 82: Codex to Codex, level high → passthrough reasoning.effort
+		// Case 82: OpenAI-Response to Codex, level high → passthrough reasoning.effort
 		{
 			name:        "82",
-			from:        "codex",
+			from:        "openai-response",
 			to:          "codex",
 			model:       "level-model(high)",
 			inputJSON:   `{"model":"level-model(high)","input":[{"role":"user","content":"hi"}]}`,
@@ -979,10 +979,10 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			expectValue: "high",
 			expectErr:   false,
 		},
-		// Case 83: Codex to Codex, level xhigh → out of range error
+		// Case 83: OpenAI-Response to Codex, level xhigh → out of range error
 		{
 			name:        "83",
-			from:        "codex",
+			from:        "openai-response",
 			to:          "codex",
 			model:       "level-model(xhigh)",
 			inputJSON:   `{"model":"level-model(xhigh)","input":[{"role":"user","content":"hi"}]}`,
@@ -1232,6 +1232,74 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			expectValue: "false",
 			expectErr:   false,
 		},
+
+		// Gemini Family Cross-Channel Consistency (Cases 106-114)
+		// Tests that gemini/gemini-cli/antigravity as same API family should have consistent validation behavior
+
+		// Case 106: Gemini to Antigravity, budget 64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "106",
+			from:        "gemini",
+			to:          "antigravity",
+			model:       "gemini-budget-model(64000)",
+			inputJSON:   `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 107: Gemini to Gemini-CLI, budget 64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "107",
+			from:        "gemini",
+			to:          "gemini-cli",
+			model:       "gemini-budget-model(64000)",
+			inputJSON:   `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 108: Gemini-CLI to Antigravity, budget 64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "108",
+			from:        "gemini-cli",
+			to:          "antigravity",
+			model:       "gemini-budget-model(64000)",
+			inputJSON:   `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 109: Gemini-CLI to Gemini, budget 64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "109",
+			from:        "gemini-cli",
+			to:          "gemini",
+			model:       "gemini-budget-model(64000)",
+			inputJSON:   `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 110: Gemini to Antigravity, budget 8192 → passthrough (normal value)
+		{
+			name:            "110",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "gemini-budget-model(8192)",
+			inputJSON:       `{"model":"gemini-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 111: Gemini-CLI to Antigravity, budget 8192 → passthrough (normal value)
+		{
+			name:            "111",
+			from:            "gemini-cli",
+			to:              "antigravity",
+			model:           "gemini-budget-model(8192)",
+			inputJSON:       `{"model":"gemini-budget-model(8192)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
 	}
 
 	runThinkingTests(t, cases)
@@ -2122,10 +2190,10 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			expectValue: "8192",
 			expectErr:   false,
 		},
-		// Case 78: Codex reasoning.effort=medium to Gemini → 8192
+		// Case 78: OpenAI-Response reasoning.effort=medium to Gemini → 8192
 		{
 			name:            "78",
-			from:            "codex",
+			from:            "openai-response",
 			to:              "gemini",
 			model:           "user-defined-model",
 			inputJSON:       `{"model":"user-defined-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"medium"}}`,
@@ -2134,10 +2202,10 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			includeThoughts: "true",
 			expectErr:       false,
 		},
-		// Case 79: Codex reasoning.effort=medium to Claude → 8192
+		// Case 79: OpenAI-Response reasoning.effort=medium to Claude → 8192
 		{
 			name:        "79",
-			from:        "codex",
+			from:        "openai-response",
 			to:          "claude",
 			model:       "user-defined-model",
 			inputJSON:   `{"model":"user-defined-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"medium"}}`,
@@ -2169,10 +2237,10 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			expectField: "",
 			expectErr:   true,
 		},
-		// Case 82: Codex to Codex, reasoning.effort=high → passthrough
+		// Case 82: OpenAI-Response to Codex, reasoning.effort=high → passthrough
 		{
 			name:        "82",
-			from:        "codex",
+			from:        "openai-response",
 			to:          "codex",
 			model:       "level-model",
 			inputJSON:   `{"model":"level-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"high"}}`,
@@ -2180,10 +2248,10 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			expectValue: "high",
 			expectErr:   false,
 		},
-		// Case 83: Codex to Codex, reasoning.effort=xhigh → out of range error
+		// Case 83: OpenAI-Response to Codex, reasoning.effort=xhigh → out of range error
 		{
 			name:        "83",
-			from:        "codex",
+			from:        "openai-response",
 			to:          "codex",
 			model:       "level-model",
 			inputJSON:   `{"model":"level-model","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"xhigh"}}`,
@@ -2433,6 +2501,74 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			expectValue: "false",
 			expectErr:   false,
 		},
+
+		// Gemini Family Cross-Channel Consistency (Cases 106-114)
+		// Tests that gemini/gemini-cli/antigravity as same API family should have consistent validation behavior
+
+		// Case 106: Gemini to Antigravity, thinkingBudget=64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "106",
+			from:        "gemini",
+			to:          "antigravity",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 107: Gemini to Gemini-CLI, thinkingBudget=64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "107",
+			from:        "gemini",
+			to:          "gemini-cli",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 108: Gemini-CLI to Antigravity, thinkingBudget=64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "108",
+			from:        "gemini-cli",
+			to:          "antigravity",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 109: Gemini-CLI to Gemini, thinkingBudget=64000 → exceeds Max error (same family strict validation)
+		{
+			name:        "109",
+			from:        "gemini-cli",
+			to:          "gemini",
+			model:       "gemini-budget-model",
+			inputJSON:   `{"model":"gemini-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}}`,
+			expectField: "",
+			expectErr:   true,
+		},
+		// Case 110: Gemini to Antigravity, thinkingBudget=8192 → passthrough (normal value)
+		{
+			name:            "110",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
+		// Case 111: Gemini-CLI to Antigravity, thinkingBudget=8192 → passthrough (normal value)
+		{
+			name:            "111",
+			from:            "gemini-cli",
+			to:              "antigravity",
+			model:           "gemini-budget-model",
+			inputJSON:       `{"model":"gemini-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "8192",
+			includeThoughts: "true",
+			expectErr:       false,
+		},
 	}
 
 	runThinkingTests(t, cases)

From 62e2b672d9ef20eddc61f35ef19ecefbd57d29e0 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sun, 18 Jan 2026 12:40:57 +0800
Subject: [PATCH 56/65] refactor(logging): centralize log directory resolution
 logic

- Introduced `ResolveLogDirectory` function in `logging` package to standardize log directory determination across components.
- Replaced redundant logic in `server`, `global_logger`, and `handlers` with the new utility function.
---
 internal/api/handlers/management/logs.go | 13 ++----------
 internal/api/server.go                   |  5 +----
 internal/logging/global_logger.go        | 25 ++++++++++++++++++------
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/internal/api/handlers/management/logs.go b/internal/api/handlers/management/logs.go
index 2612318a..b64cd619 100644
--- a/internal/api/handlers/management/logs.go
+++ b/internal/api/handlers/management/logs.go
@@ -13,7 +13,7 @@ import (
 	"time"
 
 	"github.com/gin-gonic/gin"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )
 
 const (
@@ -360,16 +360,7 @@ func (h *Handler) logDirectory() string {
 	if h.logDir != "" {
 		return h.logDir
 	}
-	if base := util.WritablePath(); base != "" {
-		return filepath.Join(base, "logs")
-	}
-	if h.configFilePath != "" {
-		dir := filepath.Dir(h.configFilePath)
-		if dir != "" && dir != "." {
-			return filepath.Join(dir, "logs")
-		}
-	}
-	return "logs"
+	return logging.ResolveLogDirectory(h.cfg)
 }
 
 func (h *Handler) collectLogFiles(dir string) ([]string, error) {
diff --git a/internal/api/server.go b/internal/api/server.go
index 831bf003..aa78ac2a 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -261,10 +261,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	if optionState.localPassword != "" {
 		s.mgmt.SetLocalPassword(optionState.localPassword)
 	}
-	logDir := filepath.Join(s.currentPath, "logs")
-	if base := util.WritablePath(); base != "" {
-		logDir = filepath.Join(base, "logs")
-	}
+	logDir := logging.ResolveLogDirectory(cfg)
 	s.mgmt.SetLogDirectory(logDir)
 	s.localPassword = optionState.localPassword
 
diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go
index 63c7af46..746bce28 100644
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -121,6 +121,24 @@ func isDirWritable(dir string) bool {
 	return true
 }
 
+// ResolveLogDirectory determines the directory used for application logs.
+func ResolveLogDirectory(cfg *config.Config) string {
+	logDir := "logs"
+	if base := util.WritablePath(); base != "" {
+		return filepath.Join(base, "logs")
+	}
+	if cfg == nil {
+		return logDir
+	}
+	if !isDirWritable(logDir) {
+		authDir := strings.TrimSpace(cfg.AuthDir)
+		if authDir != "" {
+			logDir = filepath.Join(authDir, "logs")
+		}
+	}
+	return logDir
+}
+
 // ConfigureLogOutput switches the global log destination between rotating files and stdout.
 // When logsMaxTotalSizeMB > 0, a background cleaner removes the oldest log files in the logs directory
 // until the total size is within the limit.
@@ -130,12 +148,7 @@ func ConfigureLogOutput(cfg *config.Config) error {
 	writerMu.Lock()
 	defer writerMu.Unlock()
 
-	logDir := "logs"
-	if base := util.WritablePath(); base != "" {
-		logDir = filepath.Join(base, "logs")
-	} else if !isDirWritable(logDir) {
-		logDir = filepath.Join(cfg.AuthDir, "logs")
-	}
+	logDir := ResolveLogDirectory(cfg)
 
 	protectedPath := ""
 	if cfg.LoggingToFile {

From cb6caf3f872128a439f773adab99e3d26fd1b64a Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 18 Jan 2026 16:37:14 +0800
Subject: [PATCH 57/65] fix(thinking): update ValidateConfig to include
 fromSuffix parameter and adjust budget validation logic

---
 internal/logging/global_logger.go |   2 +-
 internal/thinking/apply.go        |   2 +-
 internal/thinking/validate.go     |   8 +-
 test/thinking_conversion_test.go  | 131 ++++++++++++++++--------------
 4 files changed, 79 insertions(+), 64 deletions(-)

diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go
index 146f6c80..28c9f3b9 100644
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -30,7 +30,7 @@ var (
 type LogFormatter struct{}
 
 // logFieldOrder defines the display order for common log fields.
-var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_value", "original_level", "min", "max", "clamped_to", "error"}
+var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_mode", "original_value", "min", "max", "clamped_to", "error"}
 
 // Format renders a single log entry with custom formatting.
 func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index fe7d59b4..cf0e373b 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -159,7 +159,7 @@ func ApplyThinking(body []byte, model string, fromFormat string, toFormat string
 	}
 
 	// 5. Validate and normalize configuration
-	validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat)
+	validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat, suffixResult.HasSuffix)
 	if err != nil {
 		log.WithFields(log.Fields{
 			"provider": providerFormat,
diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go
index 5ce113f7..f082ad56 100644
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -18,12 +18,14 @@ import (
 //   - Clamps budget values to model's allowed range
 //   - When converting Budget -> Level for level-only models, clamps the derived standard level to the nearest supported level
 //     (special values none/auto are preserved)
+//   - When config comes from a model suffix, strict budget validation is disabled (we clamp instead of error)
 //
 // Parameters:
 //   - config: The thinking configuration to validate
 //   - support: Model's ThinkingSupport properties (nil means no thinking support)
 //   - fromFormat: Source provider format (used to determine strict validation rules)
 //   - toFormat: Target provider format
+//   - fromSuffix: Whether config was sourced from model suffix
 //
 // Returns:
 //   - Normalized ThinkingConfig with clamped values
@@ -33,7 +35,7 @@ import (
 //   - Budget-only model + Level config → Level converted to Budget
 //   - Level-only model + Budget config → Budget converted to Level
 //   - Hybrid model → preserve original format
-func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string) (*ThinkingConfig, error) {
+func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string, fromSuffix bool) (*ThinkingConfig, error) {
 	fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat))
 	model := "unknown"
 	support := (*registry.ThinkingSupport)(nil)
@@ -52,7 +54,7 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFo
 	}
 
 	allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat)
-	strictBudget := fromFormat != "" && isSameProviderFamily(fromFormat, toFormat)
+	strictBudget := !fromSuffix && fromFormat != "" && isSameProviderFamily(fromFormat, toFormat)
 	budgetDerivedFromLevel := false
 
 	capability := detectModelCapability(modelInfo)
@@ -238,7 +240,7 @@ func clampLevel(level ThinkingLevel, modelInfo *registry.ModelInfo, provider str
 		log.WithFields(log.Fields{
 			"provider":       provider,
 			"model":          model,
-			"original_level": string(level),
+			"original_value": string(level),
 			"clamped_to":     string(clamped),
 		}).Debug("thinking: level clamped |")
 		return clamped
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 397bbbff..8f527193 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -1001,15 +1001,17 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			includeThoughts: "true",
 			expectErr:       false,
 		},
-		// Case 85: Gemini to Gemini, budget 64000 → exceeds Max error
+		// Case 85: Gemini to Gemini, budget 64000 → clamped to Max
 		{
-			name:        "85",
-			from:        "gemini",
-			to:          "gemini",
-			model:       "gemini-budget-model(64000)",
-			inputJSON:   `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   true,
+			name:            "85",
+			from:            "gemini",
+			to:              "gemini",
+			model:           "gemini-budget-model(64000)",
+			inputJSON:       `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
 		},
 		// Case 86: Claude to Claude, budget 8192 → passthrough thinking.budget_tokens
 		{
@@ -1022,20 +1024,21 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			expectValue: "8192",
 			expectErr:   false,
 		},
-		// Case 87: Claude to Claude, budget 200000 → exceeds Max error
+		// Case 87: Claude to Claude, budget 200000 → clamped to Max
 		{
 			name:        "87",
 			from:        "claude",
 			to:          "claude",
 			model:       "claude-budget-model(200000)",
 			inputJSON:   `{"model":"claude-budget-model(200000)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   true,
+			expectField: "thinking.budget_tokens",
+			expectValue: "128000",
+			expectErr:   false,
 		},
-		// Case 88: Antigravity to Antigravity, budget 8192 → passthrough thinkingBudget
+		// Case 88: Gemini-CLI to Antigravity, budget 8192 → passthrough thinkingBudget
 		{
 			name:            "88",
-			from:            "antigravity",
+			from:            "gemini-cli",
 			to:              "antigravity",
 			model:           "antigravity-budget-model(8192)",
 			inputJSON:       `{"model":"antigravity-budget-model(8192)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
@@ -1044,15 +1047,17 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			includeThoughts: "true",
 			expectErr:       false,
 		},
-		// Case 89: Antigravity to Antigravity, budget 64000 → exceeds Max error
+		// Case 89: Gemini-CLI to Antigravity, budget 64000 → clamped to Max
 		{
-			name:        "89",
-			from:        "antigravity",
-			to:          "antigravity",
-			model:       "antigravity-budget-model(64000)",
-			inputJSON:   `{"model":"antigravity-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
-			expectField: "",
-			expectErr:   true,
+			name:            "89",
+			from:            "gemini-cli",
+			to:              "antigravity",
+			model:           "antigravity-budget-model(64000)",
+			inputJSON:       `{"model":"antigravity-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
 		},
 
 		// iflow tests: glm-test and minimax-test (Cases 90-105)
@@ -1236,45 +1241,53 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 		// Gemini Family Cross-Channel Consistency (Cases 106-114)
 		// Tests that gemini/gemini-cli/antigravity as same API family should have consistent validation behavior
 
-		// Case 106: Gemini to Antigravity, budget 64000 → exceeds Max error (same family strict validation)
+		// Case 106: Gemini to Antigravity, budget 64000 (suffix) → clamped to Max
 		{
-			name:        "106",
-			from:        "gemini",
-			to:          "antigravity",
-			model:       "gemini-budget-model(64000)",
-			inputJSON:   `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   true,
+			name:            "106",
+			from:            "gemini",
+			to:              "antigravity",
+			model:           "gemini-budget-model(64000)",
+			inputJSON:       `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
 		},
-		// Case 107: Gemini to Gemini-CLI, budget 64000 → exceeds Max error (same family strict validation)
+		// Case 107: Gemini to Gemini-CLI, budget 64000 (suffix) → clamped to Max
 		{
-			name:        "107",
-			from:        "gemini",
-			to:          "gemini-cli",
-			model:       "gemini-budget-model(64000)",
-			inputJSON:   `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
-			expectField: "",
-			expectErr:   true,
+			name:            "107",
+			from:            "gemini",
+			to:              "gemini-cli",
+			model:           "gemini-budget-model(64000)",
+			inputJSON:       `{"model":"gemini-budget-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
 		},
-		// Case 108: Gemini-CLI to Antigravity, budget 64000 → exceeds Max error (same family strict validation)
+		// Case 108: Gemini-CLI to Antigravity, budget 64000 (suffix) → clamped to Max
 		{
-			name:        "108",
-			from:        "gemini-cli",
-			to:          "antigravity",
-			model:       "gemini-budget-model(64000)",
-			inputJSON:   `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
-			expectField: "",
-			expectErr:   true,
+			name:            "108",
+			from:            "gemini-cli",
+			to:              "antigravity",
+			model:           "gemini-budget-model(64000)",
+			inputJSON:       `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
 		},
-		// Case 109: Gemini-CLI to Gemini, budget 64000 → exceeds Max error (same family strict validation)
+		// Case 109: Gemini-CLI to Gemini, budget 64000 (suffix) → clamped to Max
 		{
-			name:        "109",
-			from:        "gemini-cli",
-			to:          "gemini",
-			model:       "gemini-budget-model(64000)",
-			inputJSON:   `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
-			expectField: "",
-			expectErr:   true,
+			name:            "109",
+			from:            "gemini-cli",
+			to:              "gemini",
+			model:           "gemini-budget-model(64000)",
+			inputJSON:       `{"model":"gemini-budget-model(64000)","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`,
+			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
+			expectValue:     "20000",
+			includeThoughts: "true",
+			expectErr:       false,
 		},
 		// Case 110: Gemini to Antigravity, budget 8192 → passthrough (normal value)
 		{
@@ -2301,10 +2314,10 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			expectField: "",
 			expectErr:   true,
 		},
-		// Case 88: Antigravity to Antigravity, thinkingBudget=8192 → passthrough
+		// Case 88: Gemini-CLI to Antigravity, thinkingBudget=8192 → passthrough
 		{
 			name:            "88",
-			from:            "antigravity",
+			from:            "gemini-cli",
 			to:              "antigravity",
 			model:           "antigravity-budget-model",
 			inputJSON:       `{"model":"antigravity-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
@@ -2313,10 +2326,10 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			includeThoughts: "true",
 			expectErr:       false,
 		},
-		// Case 89: Antigravity to Antigravity, thinkingBudget=64000 → exceeds Max error
+		// Case 89: Gemini-CLI to Antigravity, thinkingBudget=64000 → exceeds Max error
 		{
 			name:        "89",
-			from:        "antigravity",
+			from:        "gemini-cli",
 			to:          "antigravity",
 			model:       "antigravity-budget-model",
 			inputJSON:   `{"model":"antigravity-budget-model","request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":64000}}}}`,
@@ -2744,9 +2757,9 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
 				t.Fatalf("field %s: expected %q, got %q, body=%s", tc.expectField, tc.expectValue, actualValue, string(body))
 			}
 
-			if tc.includeThoughts != "" && (tc.to == "gemini" || tc.to == "antigravity") {
+			if tc.includeThoughts != "" && (tc.to == "gemini" || tc.to == "gemini-cli" || tc.to == "antigravity") {
 				path := "generationConfig.thinkingConfig.includeThoughts"
-				if tc.to == "antigravity" {
+				if tc.to == "gemini-cli" || tc.to == "antigravity" {
 					path = "request.generationConfig.thinkingConfig.includeThoughts"
 				}
 				itVal := gjson.GetBytes(body, path)

From 140d6211cc0a1c2e15527c96bf4158b43c1182e7 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Mon, 19 Jan 2026 03:58:28 +0800
Subject: [PATCH 58/65] feat(translator): add reasoning state tracking and
 improve reasoning summary handling

- Introduced `oaiToResponsesStateReasoning` to track reasoning data.
- Enhanced logic for emitting reasoning summary events and managing state transitions.
- Updated output generation to handle multiple reasoning entries consistently.
---
 .../openai_openai-responses_response.go       | 64 ++++++++++++++-----
 1 file changed, 48 insertions(+), 16 deletions(-)

diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response.go b/internal/translator/openai/openai/responses/openai_openai-responses_response.go
index 17233ca5..15152852 100644
--- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go
@@ -12,6 +12,10 @@ import (
 	"github.com/tidwall/sjson"
 )
 
+type oaiToResponsesStateReasoning struct {
+	ReasoningID   string
+	ReasoningData string
+}
 type oaiToResponsesState struct {
 	Seq            int
 	ResponseID     string
@@ -23,6 +27,7 @@ type oaiToResponsesState struct {
 	// Per-output message text buffers by index
 	MsgTextBuf   map[int]*strings.Builder
 	ReasoningBuf strings.Builder
+	Reasonings   []oaiToResponsesStateReasoning
 	FuncArgsBuf  map[int]*strings.Builder // index -> args
 	FuncNames    map[int]string           // index -> name
 	FuncCallIDs  map[int]string           // index -> call_id
@@ -63,6 +68,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 			MsgItemDone:     make(map[int]bool),
 			FuncArgsDone:    make(map[int]bool),
 			FuncItemDone:    make(map[int]bool),
+			Reasonings:      make([]oaiToResponsesStateReasoning, 0),
 		}
 	}
 	st := (*param).(*oaiToResponsesState)
@@ -157,6 +163,31 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 		st.Started = true
 	}
 
+	stopReasoning := func(text string) {
+		// Emit reasoning done events
+		textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}`
+		textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq())
+		textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID)
+		textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex)
+		textDone, _ = sjson.Set(textDone, "text", text)
+		out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone))
+		partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}`
+		partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
+		partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID)
+		partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex)
+		partDone, _ = sjson.Set(partDone, "part.text", text)
+		out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone))
+		outputItemDone := `{"type":"response.output_item.done","item":{"id":"","type":"reasoning","encrypted_content":"","summary":[{"type":"summary_text","text":""}]},"output_index":0,"sequence_number":0}`
+		outputItemDone, _ = sjson.Set(outputItemDone, "sequence_number", nextSeq())
+		outputItemDone, _ = sjson.Set(outputItemDone, "item.id", st.ReasoningID)
+		outputItemDone, _ = sjson.Set(outputItemDone, "output_index", st.ReasoningIndex)
+		outputItemDone, _ = sjson.Set(outputItemDone, "item.summary.text", text)
+		out = append(out, emitRespEvent("response.output_item.done", outputItemDone))
+
+		st.Reasonings = append(st.Reasonings, oaiToResponsesStateReasoning{ReasoningID: st.ReasoningID, ReasoningData: text})
+		st.ReasoningID = ""
+	}
+
 	// choices[].delta content / tool_calls / reasoning_content
 	if choices := root.Get("choices"); choices.Exists() && choices.IsArray() {
 		choices.ForEach(func(_, choice gjson.Result) bool {
@@ -165,6 +196,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 			if delta.Exists() {
 				if c := delta.Get("content"); c.Exists() && c.String() != "" {
 					// Ensure the message item and its first content part are announced before any text deltas
+					if st.ReasoningID != "" {
+						stopReasoning(st.ReasoningBuf.String())
+						st.ReasoningBuf.Reset()
+					}
 					if !st.MsgItemAdded[idx] {
 						item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"in_progress","content":[],"role":"assistant"}}`
 						item, _ = sjson.Set(item, "sequence_number", nextSeq())
@@ -226,6 +261,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 
 				// tool calls
 				if tcs := delta.Get("tool_calls"); tcs.Exists() && tcs.IsArray() {
+					if st.ReasoningID != "" {
+						stopReasoning(st.ReasoningBuf.String())
+						st.ReasoningBuf.Reset()
+					}
 					// Before emitting any function events, if a message is open for this index,
 					// close its text/content to match Codex expected ordering.
 					if st.MsgItemAdded[idx] && !st.MsgItemDone[idx] {
@@ -361,17 +400,8 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 				}
 
 				if st.ReasoningID != "" {
-					// Emit reasoning done events
-					textDone := `{"type":"response.reasoning_summary_text.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"text":""}`
-					textDone, _ = sjson.Set(textDone, "sequence_number", nextSeq())
-					textDone, _ = sjson.Set(textDone, "item_id", st.ReasoningID)
-					textDone, _ = sjson.Set(textDone, "output_index", st.ReasoningIndex)
-					out = append(out, emitRespEvent("response.reasoning_summary_text.done", textDone))
-					partDone := `{"type":"response.reasoning_summary_part.done","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}`
-					partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
-					partDone, _ = sjson.Set(partDone, "item_id", st.ReasoningID)
-					partDone, _ = sjson.Set(partDone, "output_index", st.ReasoningIndex)
-					out = append(out, emitRespEvent("response.reasoning_summary_part.done", partDone))
+					stopReasoning(st.ReasoningBuf.String())
+					st.ReasoningBuf.Reset()
 				}
 
 				// Emit function call done events for any active function calls
@@ -485,11 +515,13 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 				}
 				// Build response.output using aggregated buffers
 				outputsWrapper := `{"arr":[]}`
-				if st.ReasoningBuf.Len() > 0 {
-					item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`
-					item, _ = sjson.Set(item, "id", st.ReasoningID)
-					item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String())
-					outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
+				if len(st.Reasonings) > 0 {
+					for _, r := range st.Reasonings {
+						item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`
+						item, _ = sjson.Set(item, "id", r.ReasoningID)
+						item, _ = sjson.Set(item, "summary.0.text", r.ReasoningData)
+						outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item)
+					}
 				}
 				// Append message items in ascending index order
 				if len(st.MsgItemAdded) > 0 {

From cf9daf470ca7698c08970f6cb22aede8af2677c5 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Mon, 19 Jan 2026 11:23:44 +0800
Subject: [PATCH 59/65] feat(translator): report cached token usage in Claude
 output

---
 .../codex/claude/codex_claude_response.go     | 39 +++++++++++---
 .../openai/claude/openai_claude_response.go   | 53 +++++++++++++------
 2 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go
index c700ef84..5223cd94 100644
--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 		} else {
 			template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
 		}
-		template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int())
-		template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int())
+		inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage"))
+		template, _ = sjson.Set(template, "usage.input_tokens", inputTokens)
+		template, _ = sjson.Set(template, "usage.output_tokens", outputTokens)
+		if cachedTokens > 0 {
+			template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens)
+		}
 
 		output = "event: message_delta\n"
 		output += fmt.Sprintf("data: %s\n\n", template)
@@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 	out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
 	out, _ = sjson.Set(out, "id", responseData.Get("id").String())
 	out, _ = sjson.Set(out, "model", responseData.Get("model").String())
-	out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
-	out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+	inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage"))
+	out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
+	out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
+	if cachedTokens > 0 {
+		out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
+	}
 
 	hasToolCall := false
 
@@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 		out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw)
 	}
 
-	if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() {
-		out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
-		out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+	return out
+}
+
+func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) {
+	if !usage.Exists() || usage.Type == gjson.Null {
+		return 0, 0, 0
 	}
 
-	return out
+	inputTokens := usage.Get("input_tokens").Int()
+	outputTokens := usage.Get("output_tokens").Int()
+	cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int()
+
+	if cachedTokens > 0 {
+		if inputTokens >= cachedTokens {
+			inputTokens -= cachedTokens
+		} else {
+			inputTokens = 0
+		}
+	}
+
+	return inputTokens, outputTokens, cachedTokens
 }
 
 // buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools.
diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go
index 1629545d..b6e0d005 100644
--- a/internal/translator/openai/claude/openai_claude_response.go
+++ b/internal/translator/openai/claude/openai_claude_response.go
@@ -289,21 +289,17 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 	// Only process if usage has actual values (not null)
 	if param.FinishReason != "" {
 		usage := root.Get("usage")
-		var inputTokens, outputTokens int64
+		var inputTokens, outputTokens, cachedTokens int64
 		if usage.Exists() && usage.Type != gjson.Null {
-			// Check if usage has actual token counts
-			promptTokens := usage.Get("prompt_tokens")
-			completionTokens := usage.Get("completion_tokens")
-
-			if promptTokens.Exists() && completionTokens.Exists() {
-				inputTokens = promptTokens.Int()
-				outputTokens = completionTokens.Int()
-			}
+			inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage)
 			// Send message_delta with usage
 			messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
+			if cachedTokens > 0 {
+				messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens)
+			}
 			results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
 			param.MessageDeltaSent = true
 
@@ -423,13 +419,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string {
 
 	// Set usage information
 	if usage := root.Get("usage"); usage.Exists() {
-		out, _ = sjson.Set(out, "usage.input_tokens", usage.Get("prompt_tokens").Int())
-		out, _ = sjson.Set(out, "usage.output_tokens", usage.Get("completion_tokens").Int())
-		reasoningTokens := int64(0)
-		if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() {
-			reasoningTokens = v.Int()
+		inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage)
+		out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
+		out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
+		if cachedTokens > 0 {
+			out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
 		}
-		out, _ = sjson.Set(out, "usage.reasoning_tokens", reasoningTokens)
 	}
 
 	return []string{out}
@@ -674,8 +669,12 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 	}
 
 	if respUsage := root.Get("usage"); respUsage.Exists() {
-		out, _ = sjson.Set(out, "usage.input_tokens", respUsage.Get("prompt_tokens").Int())
-		out, _ = sjson.Set(out, "usage.output_tokens", respUsage.Get("completion_tokens").Int())
+		inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage)
+		out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
+		out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
+		if cachedTokens > 0 {
+			out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
+		}
 	}
 
 	if !stopReasonSet {
@@ -692,3 +691,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 func ClaudeTokenCount(ctx context.Context, count int64) string {
 	return fmt.Sprintf(`{"input_tokens":%d}`, count)
 }
+
+func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) {
+	if !usage.Exists() || usage.Type == gjson.Null {
+		return 0, 0, 0
+	}
+
+	inputTokens := usage.Get("prompt_tokens").Int()
+	outputTokens := usage.Get("completion_tokens").Int()
+	cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int()
+
+	if cachedTokens > 0 {
+		if inputTokens >= cachedTokens {
+			inputTokens -= cachedTokens
+		} else {
+			inputTokens = 0
+		}
+	}
+
+	return inputTokens, outputTokens, cachedTokens
+}

From 52e46ced1bd33a3fd8ff8f7cd136216b734bd8fc Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Mon, 19 Jan 2026 11:33:27 +0800
Subject: [PATCH 60/65] fix(translator): avoid forcing RFC 8259 system prompt

---
 internal/translator/openai/claude/openai_claude_request.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go
index 3817b77b..c268ec62 100644
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -88,7 +88,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	var messagesJSON = "[]"
 
 	// Handle system message first
-	systemMsgJSON := `{"role":"system","content":[{"type":"text","text":"Use ANY tool, the parameters MUST accord with RFC 8259 (The JavaScript Object Notation (JSON) Data Interchange Format), the keys and value MUST be enclosed in double quotes."}]}`
+	systemMsgJSON := `{"role":"system","content":[]}`
 	if system := root.Get("system"); system.Exists() {
 		if system.Type == gjson.String {
 			if system.String() != "" {

From c421d653e75e3eb161d6f1d96578c40510e1fbb8 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Mon, 19 Jan 2026 15:50:35 +0800
Subject: [PATCH 61/65] refactor(claude): move max_tokens constraint
 enforcement to Apply method

---
 internal/runtime/executor/claude_executor.go | 82 --------------------
 internal/thinking/provider/claude/apply.go   | 45 +++++++++++
 2 files changed, 45 insertions(+), 82 deletions(-)

diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 17c5a143..b6d5418a 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -17,7 +17,6 @@ import (
 	claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -119,9 +118,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
 
-	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(baseModel, body)
-
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -250,9 +246,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
 
-	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(baseModel, body)
-
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -541,81 +534,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
 	return body
 }
 
-// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
-// Anthropic API requires this constraint; violating it returns a 400 error.
-// This function should be called after all thinking configuration is finalized.
-// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
-func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
-	thinkingType := gjson.GetBytes(body, "thinking.type").String()
-	if thinkingType != "enabled" {
-		return body
-	}
-
-	budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
-	if budgetTokens <= 0 {
-		return body
-	}
-
-	maxTokens := gjson.GetBytes(body, "max_tokens").Int()
-
-	// Look up the model's max completion tokens from the registry
-	maxCompletionTokens := 0
-	if modelInfo := registry.LookupModelInfo(modelName); modelInfo != nil {
-		maxCompletionTokens = modelInfo.MaxCompletionTokens
-	}
-
-	// Fall back to budget + buffer if registry lookup fails or returns 0
-	const fallbackBuffer = 4000
-	requiredMaxTokens := budgetTokens + fallbackBuffer
-	if maxCompletionTokens > 0 {
-		requiredMaxTokens = int64(maxCompletionTokens)
-	}
-
-	if maxTokens < requiredMaxTokens {
-		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
-	}
-	return body
-}
-
-func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
-	if auth == nil || e.cfg == nil {
-		return nil
-	}
-	var attrKey, attrBase string
-	if auth.Attributes != nil {
-		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
-		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
-	}
-	for i := range e.cfg.ClaudeKey {
-		entry := &e.cfg.ClaudeKey[i]
-		cfgKey := strings.TrimSpace(entry.APIKey)
-		cfgBase := strings.TrimSpace(entry.BaseURL)
-		if attrKey != "" && attrBase != "" {
-			if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
-				return entry
-			}
-			continue
-		}
-		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
-			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
-				return entry
-			}
-		}
-		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
-			return entry
-		}
-	}
-	if attrKey != "" {
-		for i := range e.cfg.ClaudeKey {
-			entry := &e.cfg.ClaudeKey[i]
-			if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
-				return entry
-			}
-		}
-	}
-	return nil
-}
-
 type compositeReadCloser struct {
 	io.Reader
 	closers []func() error
diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go
index b7833072..babc2f76 100644
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -80,9 +80,54 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
 
 	result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
 	result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+
+	// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
+	result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
 	return result, nil
 }
 
+// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens.
+// Anthropic API requires this constraint; violating it returns a 400 error.
+func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte {
+	if budgetTokens <= 0 {
+		return body
+	}
+
+	effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
+	if effectiveMax > 0 && effectiveMax > budgetTokens {
+		if setDefaultMax {
+			body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
+		}
+		return body
+	}
+
+	// Fall back to budget + buffer if no effective max or max <= budget
+	const fallbackBuffer = 4000
+	requiredMaxTokens := budgetTokens + fallbackBuffer
+	if effectiveMax > 0 && effectiveMax > requiredMaxTokens {
+		requiredMaxTokens = effectiveMax
+	}
+
+	currentMax := gjson.GetBytes(body, "max_tokens").Int()
+	if currentMax < int64(requiredMaxTokens) {
+		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
+	}
+	return body
+}
+
+// effectiveMaxTokens returns the max tokens to cap thinking:
+// prefer request-provided max_tokens; otherwise fall back to model default.
+// The boolean indicates whether the value came from the model default (and thus should be written back).
+func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
+	if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 {
+		return int(maxTok.Int()), false
+	}
+	if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+		return modelInfo.MaxCompletionTokens, true
+	}
+	return 0, false
+}
+
 func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil

From 239a28793c3b0229a0cefe7673c4e72c54c3288e Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Mon, 19 Jan 2026 16:32:20 +0800
Subject: [PATCH 62/65] feat(claude): clamp thinking budget to max_tokens
 constraints

---
 internal/thinking/provider/claude/apply.go | 38 ++++++++++++++--------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go
index babc2f76..3c74d514 100644
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -93,25 +93,37 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
 		return body
 	}
 
+	// Ensure the request satisfies Claude constraints:
+	//  1) Determine effective max_tokens (request overrides model default)
+	//  2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
+	//  3) If the adjusted budget falls below the model minimum, leave the request unchanged
+	//  4) If max_tokens came from model default, write it back into the request
+
 	effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
-	if effectiveMax > 0 && effectiveMax > budgetTokens {
-		if setDefaultMax {
-			body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
-		}
+	if setDefaultMax && effectiveMax > 0 {
+		body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
+	}
+
+	// Compute the budget we would apply after enforcing budget_tokens < max_tokens.
+	adjustedBudget := budgetTokens
+	if effectiveMax > 0 && adjustedBudget >= effectiveMax {
+		adjustedBudget = effectiveMax - 1
+	}
+
+	minBudget := 0
+	if modelInfo != nil && modelInfo.Thinking != nil {
+		minBudget = modelInfo.Thinking.Min
+	}
+	if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
+		// If enforcing the max_tokens constraint would push the budget below the model minimum,
+		// leave the request unchanged.
 		return body
 	}
 
-	// Fall back to budget + buffer if no effective max or max <= budget
-	const fallbackBuffer = 4000
-	requiredMaxTokens := budgetTokens + fallbackBuffer
-	if effectiveMax > 0 && effectiveMax > requiredMaxTokens {
-		requiredMaxTokens = effectiveMax
+	if adjustedBudget != budgetTokens {
+		body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget)
 	}
 
-	currentMax := gjson.GetBytes(body, "max_tokens").Int()
-	if currentMax < int64(requiredMaxTokens) {
-		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
-	}
 	return body
 }
 

From c175821cc4a3960e633f6de1e880e43f79309e75 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Mon, 19 Jan 2026 18:07:22 +0800
Subject: [PATCH 63/65] feat(registry): expand antigravity model config

Remove static Name mapping and add entries for claude-sonnet-4-5,
tab_flash_lite_preview, and gpt-oss-120b-medium configs
---
 internal/registry/model_definitions.go        | 19 ++++++++++---------
 .../runtime/executor/antigravity_executor.go  |  3 ---
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 77669e4b..080c2726 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -765,21 +765,23 @@ func GetIFlowModels() []*ModelInfo {
 type AntigravityModelConfig struct {
 	Thinking            *ThinkingSupport
 	MaxCompletionTokens int
-	Name                string
 }
 
 // GetAntigravityModelConfig returns static configuration for antigravity models.
 // Keys use upstream model names returned by the Antigravity models endpoint.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
-		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
-		"rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/rev19-uic3-1p"},
-		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-high"},
-		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image"},
-		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash"},
+		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
+		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
 		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
+		"gpt-oss-120b-medium":        {},
+		"tab_flash_lite_preview":     {},
 	}
 }
 
@@ -809,10 +811,9 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 	}
 
 	// Check Antigravity static config
-	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil && cfg.Thinking != nil {
+	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil {
 		return &ModelInfo{
 			ID:                  modelID,
-			Name:                cfg.Name,
 			Thinking:            cfg.Thinking,
 			MaxCompletionTokens: cfg.MaxCompletionTokens,
 		}
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 99392188..602ed628 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -1005,9 +1005,6 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 			}
 			modelCfg := modelConfig[modelID]
 			modelName := modelID
-			if modelCfg != nil && modelCfg.Name != "" {
-				modelName = modelCfg.Name
-			}
 			modelInfo := &registry.ModelInfo{
 				ID:          modelID,
 				Name:        modelName,

From 1d2fe55310024844ff12aba79b5b2b21f14b7b45 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Mon, 19 Jan 2026 19:49:39 +0800
Subject: [PATCH 64/65] fix(executor): stop rewriting thinkingLevel for gemini

---
 internal/runtime/executor/antigravity_executor.go | 7 -------
 test/thinking_conversion_test.go                  | 4 ++++
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 602ed628..df26e376 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -1407,13 +1407,6 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
 	template, _ = sjson.Delete(template, "request.safetySettings")
 	template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
 
-	if !strings.HasPrefix(modelName, "gemini-3-") {
-		if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() {
-			template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel")
-			template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-		}
-	}
-
 	if strings.Contains(modelName, "claude") {
 		gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
 			tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 8f527193..4a7df29a 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -20,6 +20,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )
 
 // thinkingTestCase represents a common test case structure for both suffix and body tests.
@@ -2707,6 +2708,9 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
 				[]byte(tc.inputJSON),
 				true,
 			)
+			if applyTo == "claude" {
+				body, _ = sjson.SetBytes(body, "max_tokens", 200000)
+			}
 
 			body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo)
 

From 8734d4cb9080706ffc4e980452a5cc44ce5097af Mon Sep 17 00:00:00 2001
From: dinhkarate <dinhkarat@gmail.com>
Date: Tue, 20 Jan 2026 01:26:37 +0700
Subject: [PATCH 65/65] feat(vertex): add Imagen image generation model support

Add support for Imagen 3.0 and 4.0 image generation models in Vertex AI:

- Add 5 Imagen model definitions (4.0, 4.0-ultra, 4.0-fast, 3.0, 3.0-fast)
- Implement :predict action routing for Imagen models
- Convert Imagen request/response format to match Gemini structure like gemini-3-pro-image
- Transform prompts to Imagen's instances/parameters format
- Convert base64 image responses to Gemini-compatible inline data
---
 internal/registry/model_definitions.go        |  61 +++++
 .../executor/gemini_vertex_executor.go        | 222 +++++++++++++++---
 2 files changed, 256 insertions(+), 27 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 080c2726..1d29bda2 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -287,6 +287,67 @@ func GetGeminiVertexModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
+		// Imagen image generation models - use :predict action
+		{
+			ID:                         "imagen-4.0-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Generate",
+			Description:                "Imagen 4.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-ultra-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-ultra-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Ultra Generate",
+			Description:                "Imagen 4.0 Ultra high-quality image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-generate-002",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-generate-002",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Generate",
+			Description:                "Imagen 3.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-fast-generate-001",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Fast Generate",
+			Description:                "Imagen 3.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-fast-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Fast Generate",
+			Description:                "Imagen 4.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
 	}
 }
 
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index 20e59b3f..1184c07e 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -12,6 +12,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"
 
 	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -31,6 +32,143 @@ const (
 	vertexAPIVersion = "v1"
 )
 
+// isImagenModel checks if the model name is an Imagen image generation model.
+// Imagen models use the :predict action instead of :generateContent.
+func isImagenModel(model string) bool {
+	lowerModel := strings.ToLower(model)
+	return strings.Contains(lowerModel, "imagen")
+}
+
+// getVertexAction returns the appropriate action for the given model.
+// Imagen models use "predict", while Gemini models use "generateContent".
+func getVertexAction(model string, isStream bool) string {
+	if isImagenModel(model) {
+		return "predict"
+	}
+	if isStream {
+		return "streamGenerateContent"
+	}
+	return "generateContent"
+}
+
+// convertImagenToGeminiResponse converts Imagen API response to Gemini format
+// so it can be processed by the standard translation pipeline.
+// This ensures Imagen models return responses in the same format as gemini-3-pro-image-preview.
+func convertImagenToGeminiResponse(data []byte, model string) []byte {
+	predictions := gjson.GetBytes(data, "predictions")
+	if !predictions.Exists() || !predictions.IsArray() {
+		return data
+	}
+
+	// Build Gemini-compatible response with inlineData
+	parts := make([]map[string]any, 0)
+	for _, pred := range predictions.Array() {
+		imageData := pred.Get("bytesBase64Encoded").String()
+		mimeType := pred.Get("mimeType").String()
+		if mimeType == "" {
+			mimeType = "image/png"
+		}
+		if imageData != "" {
+			parts = append(parts, map[string]any{
+				"inlineData": map[string]any{
+					"mimeType": mimeType,
+					"data":     imageData,
+				},
+			})
+		}
+	}
+
+	// Generate unique response ID using timestamp
+	responseId := fmt.Sprintf("imagen-%d", time.Now().UnixNano())
+
+	response := map[string]any{
+		"candidates": []map[string]any{{
+			"content": map[string]any{
+				"parts": parts,
+				"role":  "model",
+			},
+			"finishReason": "STOP",
+		}},
+		"responseId":   responseId,
+		"modelVersion": model,
+		// Imagen API doesn't return token counts, set to 0 for tracking purposes
+		"usageMetadata": map[string]any{
+			"promptTokenCount":     0,
+			"candidatesTokenCount": 0,
+			"totalTokenCount":      0,
+		},
+	}
+
+	result, err := json.Marshal(response)
+	if err != nil {
+		return data
+	}
+	return result
+}
+
+// convertToImagenRequest converts a Gemini-style request to Imagen API format.
+// Imagen API uses a different structure: instances[].prompt instead of contents[].
+func convertToImagenRequest(payload []byte) ([]byte, error) {
+	// Extract prompt from Gemini-style contents
+	prompt := ""
+
+	// Try to get prompt from contents[0].parts[0].text
+	contentsText := gjson.GetBytes(payload, "contents.0.parts.0.text")
+	if contentsText.Exists() {
+		prompt = contentsText.String()
+	}
+
+	// If no contents, try messages format (OpenAI-compatible)
+	if prompt == "" {
+		messagesText := gjson.GetBytes(payload, "messages.#.content")
+		if messagesText.Exists() && messagesText.IsArray() {
+			for _, msg := range messagesText.Array() {
+				if msg.String() != "" {
+					prompt = msg.String()
+					break
+				}
+			}
+		}
+	}
+
+	// If still no prompt, try direct prompt field
+	if prompt == "" {
+		directPrompt := gjson.GetBytes(payload, "prompt")
+		if directPrompt.Exists() {
+			prompt = directPrompt.String()
+		}
+	}
+
+	if prompt == "" {
+		return nil, fmt.Errorf("imagen: no prompt found in request")
+	}
+
+	// Build Imagen API request
+	imagenReq := map[string]any{
+		"instances": []map[string]any{
+			{
+				"prompt": prompt,
+			},
+		},
+		"parameters": map[string]any{
+			"sampleCount": 1,
+		},
+	}
+
+	// Extract optional parameters
+	if aspectRatio := gjson.GetBytes(payload, "aspectRatio"); aspectRatio.Exists() {
+		imagenReq["parameters"].(map[string]any)["aspectRatio"] = aspectRatio.String()
+	}
+	if sampleCount := gjson.GetBytes(payload, "sampleCount"); sampleCount.Exists() {
+		imagenReq["parameters"].(map[string]any)["sampleCount"] = int(sampleCount.Int())
+	}
+	if negativePrompt := gjson.GetBytes(payload, "negativePrompt"); negativePrompt.Exists() {
+		imagenReq["instances"].([]map[string]any)[0]["negativePrompt"] = negativePrompt.String()
+	}
+
+	return json.Marshal(imagenReq)
+}
+
 // GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials.
 type GeminiVertexExecutor struct {
 	cfg *config.Config
@@ -160,26 +298,38 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
-	from := opts.SourceFormat
-	to := sdktranslator.FromString("gemini")
+	var body []byte
 
-	originalPayload := bytes.Clone(req.Payload)
-	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
-	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	// Handle Imagen models with special request format
+	if isImagenModel(baseModel) {
+		imagenBody, errImagen := convertToImagenRequest(req.Payload)
+		if errImagen != nil {
+			return resp, errImagen
+		}
+		body = imagenBody
+	} else {
+		// Standard Gemini translation flow
+		from := opts.SourceFormat
+		to := sdktranslator.FromString("gemini")
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	if err != nil {
-		return resp, err
+		originalPayload := bytes.Clone(req.Payload)
+		if len(opts.OriginalRequest) > 0 {
+			originalPayload = bytes.Clone(opts.OriginalRequest)
+		}
+		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+		if err != nil {
+			return resp, err
+		}
+
+		body = fixGeminiImageAspectRatio(baseModel, body)
+		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+		body, _ = sjson.SetBytes(body, "model", baseModel)
 	}
 
-	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", baseModel)
-
-	action := "generateContent"
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -249,6 +399,16 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
+
+	// For Imagen models, convert response to Gemini format before translation
+	// This ensures Imagen responses use the same format as gemini-3-pro-image-preview
+	if isImagenModel(baseModel) {
+		data = convertImagenToGeminiResponse(data, baseModel)
+	}
+
+	// Standard Gemini translation (works for both Gemini and converted Imagen responses)
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("gemini")
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
@@ -281,7 +441,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	action := "generateContent"
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -384,12 +544,16 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
+	action := getVertexAction(baseModel, true)
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")
 
@@ -503,15 +667,19 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
+	action := getVertexAction(baseModel, true)
 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")