Merge pull request #536 from AoaoMH/feature/auth-model-check

feat: using Client Model Infos;
2026-02-05 22:10:51 +08:00 · 2025-12-15 00:29:33 +08:00 · 2025-12-15 00:13:05 +08:00 · 2025-12-14 21:30:17 +08:00 · 2025-12-14 21:27:43 +08:00 · 2025-12-14 16:46:43 +08:00
27 changed files with 1086 additions and 138 deletions
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -26,6 +26,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -266,6 +267,54 @@ func (h *Handler) ListAuthFiles(c *gin.Context) {
 	c.JSON(200, gin.H{"files": files})
 }

+// GetAuthFileModels returns the models supported by a specific auth file
+func (h *Handler) GetAuthFileModels(c *gin.Context) {
+	name := c.Query("name")
+	if name == "" {
+		c.JSON(400, gin.H{"error": "name is required"})
+		return
+	}
+
+	// Try to find auth ID via authManager
+	var authID string
+	if h.authManager != nil {
+		auths := h.authManager.List()
+		for _, auth := range auths {
+			if auth.FileName == name || auth.ID == name {
+				authID = auth.ID
+				break
+			}
+		}
+	}
+
+	if authID == "" {
+		authID = name // fallback to filename as ID
+	}
+
+	// Get models from registry
+	reg := registry.GetGlobalRegistry()
+	models := reg.GetModelsForClient(authID)
+
+	result := make([]gin.H, 0, len(models))
+	for _, m := range models {
+		entry := gin.H{
+			"id": m.ID,
+		}
+		if m.DisplayName != "" {
+			entry["display_name"] = m.DisplayName
+		}
+		if m.Type != "" {
+			entry["type"] = m.Type
+		}
+		if m.OwnedBy != "" {
+			entry["owned_by"] = m.OwnedBy
+		}
+		result = append(result, entry)
+	}
+
+	c.JSON(200, gin.H{"models": result})
+}
+
 // List auth files from disk when the auth manager is unavailable.
 func (h *Handler) listAuthFilesFromDisk(c *gin.Context) {
 	entries, err := os.ReadDir(h.cfg.AuthDir)
@@ -1722,6 +1771,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 		return
 	}

+	// Check for duplicate BXAuth before authentication
+	bxAuth := iflowauth.ExtractBXAuth(cookieValue)
+	if existingFile, err := iflowauth.CheckDuplicateBXAuth(h.cfg.AuthDir, bxAuth); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to check duplicate"})
+		return
+	} else if existingFile != "" {
+		existingFileName := filepath.Base(existingFile)
+		c.JSON(http.StatusConflict, gin.H{"status": "error", "error": "duplicate BXAuth found", "existing_file": existingFileName})
+		return
+	}
+
 	authSvc := iflowauth.NewIFlowAuth(h.cfg)
 	tokenData, errAuth := authSvc.AuthenticateWithCookie(ctx, cookieValue)
 	if errAuth != nil {
@@ -1744,11 +1804,12 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	}

 	tokenStorage.Email = email
+	timestamp := time.Now().Unix()

 	record := &coreauth.Auth{
-		ID:       fmt.Sprintf("iflow-%s.json", fileName),
+		ID:       fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
 		Provider: "iflow",
-		FileName: fmt.Sprintf("iflow-%s.json", fileName),
+		FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
 		Storage:  tokenStorage,
 		Metadata: map[string]any{
 			"email":        email,
--- a/internal/api/modules/amp/response_rewriter.go
+++ b/internal/api/modules/amp/response_rewriter.go
@@ -39,7 +39,13 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
 	}

 	if rw.isStreaming {
-		return rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
+		n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
+		if err == nil {
+			if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
+				flusher.Flush()
+			}
+		}
+		return n, err
 	}
 	return rw.body.Write(data)
 }
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -568,6 +568,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)

 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
+		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
--- a/internal/auth/iflow/cookie_helpers.go
+++ b/internal/auth/iflow/cookie_helpers.go
@@ -1,7 +1,10 @@
 package iflow

 import (
+	"encoding/json"
 	"fmt"
+	"os"
+	"path/filepath"
 	"strings"
 )

@@ -36,3 +39,61 @@ func SanitizeIFlowFileName(raw string) string {
 	}
 	return strings.TrimSpace(result.String())
 }
+
+// ExtractBXAuth extracts the BXAuth value from a cookie string.
+func ExtractBXAuth(cookie string) string {
+	parts := strings.Split(cookie, ";")
+	for _, part := range parts {
+		part = strings.TrimSpace(part)
+		if strings.HasPrefix(part, "BXAuth=") {
+			return strings.TrimPrefix(part, "BXAuth=")
+		}
+	}
+	return ""
+}
+
+// CheckDuplicateBXAuth checks if the given BXAuth value already exists in any iflow auth file.
+// Returns the path of the existing file if found, empty string otherwise.
+func CheckDuplicateBXAuth(authDir, bxAuth string) (string, error) {
+	if bxAuth == "" {
+		return "", nil
+	}
+
+	entries, err := os.ReadDir(authDir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return "", nil
+		}
+		return "", fmt.Errorf("read auth dir failed: %w", err)
+	}
+
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+		if !strings.HasPrefix(name, "iflow-") || !strings.HasSuffix(name, ".json") {
+			continue
+		}
+
+		filePath := filepath.Join(authDir, name)
+		data, err := os.ReadFile(filePath)
+		if err != nil {
+			continue
+		}
+
+		var tokenData struct {
+			Cookie string `json:"cookie"`
+		}
+		if err := json.Unmarshal(data, &tokenData); err != nil {
+			continue
+		}
+
+		existingBXAuth := ExtractBXAuth(tokenData.Cookie)
+		if existingBXAuth != "" && existingBXAuth == bxAuth {
+			return filePath, nil
+		}
+	}
+
+	return "", nil
+}
--- a/internal/auth/iflow/iflow_auth.go
+++ b/internal/auth/iflow/iflow_auth.go
@@ -494,11 +494,18 @@ func (ia *IFlowAuth) CreateCookieTokenStorage(data *IFlowTokenData) *IFlowTokenS
 		return nil
 	}

+	// Only save the BXAuth field from the cookie
+	bxAuth := ExtractBXAuth(data.Cookie)
+	cookieToSave := ""
+	if bxAuth != "" {
+		cookieToSave = "BXAuth=" + bxAuth + ";"
+	}
+
 	return &IFlowTokenStorage{
 		APIKey:      data.APIKey,
 		Email:       data.Email,
 		Expire:      data.Expire,
-		Cookie:      data.Cookie,
+		Cookie:      cookieToSave,
 		LastRefresh: time.Now().Format(time.RFC3339),
 		Type:        "iflow",
 	}
--- a/internal/cmd/iflow_cookie.go
+++ b/internal/cmd/iflow_cookie.go
@@ -5,7 +5,9 @@ import (
 	"context"
 	"fmt"
 	"os"
+	"path/filepath"
 	"strings"
+	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -37,6 +39,16 @@ func DoIFlowCookieAuth(cfg *config.Config, options *LoginOptions) {
 		return
 	}

+	// Check for duplicate BXAuth before authentication
+	bxAuth := iflow.ExtractBXAuth(cookie)
+	if existingFile, err := iflow.CheckDuplicateBXAuth(cfg.AuthDir, bxAuth); err != nil {
+		fmt.Printf("Failed to check duplicate: %v\n", err)
+		return
+	} else if existingFile != "" {
+		fmt.Printf("Duplicate BXAuth found, authentication already exists: %s\n", filepath.Base(existingFile))
+		return
+	}
+
 	// Authenticate with cookie
 	auth := iflow.NewIFlowAuth(cfg)
 	ctx := context.Background()
@@ -82,5 +94,5 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
 // getAuthFilePath returns the auth file path for the given provider and email
 func getAuthFilePath(cfg *config.Config, provider, email string) string {
 	fileName := iflow.SanitizeIFlowFileName(email)
-	return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, fileName)
+	return fmt.Sprintf("%s/%s-%s-%d.json", cfg.AuthDir, provider, fileName, time.Now().Unix())
 }
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -580,7 +580,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
 		},
 	}
 }
@@ -648,10 +648,11 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
 		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
-		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
+		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
+		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
 		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
 		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -90,6 +90,9 @@ type ModelRegistry struct {
 	models map[string]*ModelRegistration
 	// clientModels maps client ID to the models it provides
 	clientModels map[string][]string
+	// clientModelInfos maps client ID to a map of model ID -> ModelInfo
+	// This preserves the original model info provided by each client
+	clientModelInfos map[string]map[string]*ModelInfo
 	// clientProviders maps client ID to its provider identifier
 	clientProviders map[string]string
 	// mutex ensures thread-safe access to the registry
@@ -104,10 +107,11 @@ var registryOnce sync.Once
 func GetGlobalRegistry() *ModelRegistry {
 	registryOnce.Do(func() {
 		globalRegistry = &ModelRegistry{
-			models:          make(map[string]*ModelRegistration),
-			clientModels:    make(map[string][]string),
-			clientProviders: make(map[string]string),
-			mutex:           &sync.RWMutex{},
+			models:           make(map[string]*ModelRegistration),
+			clientModels:     make(map[string][]string),
+			clientModelInfos: make(map[string]map[string]*ModelInfo),
+			clientProviders:  make(map[string]string),
+			mutex:            &sync.RWMutex{},
 		}
 	})
 	return globalRegistry
@@ -144,6 +148,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		// No models supplied; unregister existing client state if present.
 		r.unregisterClientInternal(clientID)
 		delete(r.clientModels, clientID)
+		delete(r.clientModelInfos, clientID)
 		delete(r.clientProviders, clientID)
 		misc.LogCredentialSeparator()
 		return
@@ -152,7 +157,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 	now := time.Now()

 	oldModels, hadExisting := r.clientModels[clientID]
-	oldProvider, _ := r.clientProviders[clientID]
+	oldProvider := r.clientProviders[clientID]
 	providerChanged := oldProvider != provider
 	if !hadExisting {
 		// Pure addition path.
@@ -161,6 +166,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 			r.addModelRegistration(modelID, provider, model, now)
 		}
 		r.clientModels[clientID] = append([]string(nil), rawModelIDs...)
+		// Store client's own model infos
+		clientInfos := make(map[string]*ModelInfo, len(newModels))
+		for id, m := range newModels {
+			clientInfos[id] = cloneModelInfo(m)
+		}
+		r.clientModelInfos[clientID] = clientInfos
 		if provider != "" {
 			r.clientProviders[clientID] = provider
 		} else {
@@ -287,6 +298,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 	if len(rawModelIDs) > 0 {
 		r.clientModels[clientID] = append([]string(nil), rawModelIDs...)
 	}
+	// Update client's own model infos
+	clientInfos := make(map[string]*ModelInfo, len(newModels))
+	for id, m := range newModels {
+		clientInfos[id] = cloneModelInfo(m)
+	}
+	r.clientModelInfos[clientID] = clientInfos
 	if provider != "" {
 		r.clientProviders[clientID] = provider
 	} else {
@@ -436,6 +453,7 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
 	}

 	delete(r.clientModels, clientID)
+	delete(r.clientModelInfos, clientID)
 	if hasProvider {
 		delete(r.clientProviders, clientID)
 	}
@@ -871,3 +889,44 @@ func (r *ModelRegistry) GetFirstAvailableModel(handlerType string) (string, erro

 	return "", fmt.Errorf("no available clients for any model in handler type: %s", handlerType)
 }
+
+// GetModelsForClient returns the models registered for a specific client.
+// Parameters:
+//   - clientID: The client identifier (typically auth file name or auth ID)
+//
+// Returns:
+//   - []*ModelInfo: List of models registered for this client, nil if client not found
+func (r *ModelRegistry) GetModelsForClient(clientID string) []*ModelInfo {
+	r.mutex.RLock()
+	defer r.mutex.RUnlock()
+
+	modelIDs, exists := r.clientModels[clientID]
+	if !exists || len(modelIDs) == 0 {
+		return nil
+	}
+
+	// Try to use client-specific model infos first
+	clientInfos := r.clientModelInfos[clientID]
+
+	seen := make(map[string]struct{})
+	result := make([]*ModelInfo, 0, len(modelIDs))
+	for _, modelID := range modelIDs {
+		if _, dup := seen[modelID]; dup {
+			continue
+		}
+		seen[modelID] = struct{}{}
+
+		// Prefer client's own model info to preserve original type/owned_by
+		if clientInfos != nil {
+			if info, ok := clientInfos[modelID]; ok && info != nil {
+				result = append(result, info)
+				continue
+			}
+		}
+		// Fallback to global registry (for backwards compatibility)
+		if reg, ok := r.models[modelID]; ok && reg.Info != nil {
+			result = append(result, reg.Info)
+		}
+	}
+	return result
+}
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,8 +54,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -152,8 +152,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	modelForCounting := req.Model

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -11,6 +11,8 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"regexp"
+	"strconv"
 	"strings"
 	"time"

@@ -784,20 +786,45 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
 	// Try to parse the retryDelay from the error response
 	// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
 	details := gjson.GetBytes(errorBody, "error.details")
-	if !details.Exists() || !details.IsArray() {
-		return nil, fmt.Errorf("no error.details found")
+	if details.Exists() && details.IsArray() {
+		for _, detail := range details.Array() {
+			typeVal := detail.Get("@type").String()
+			if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
+				retryDelay := detail.Get("retryDelay").String()
+				if retryDelay != "" {
+					// Parse duration string like "0.847655010s"
+					duration, err := time.ParseDuration(retryDelay)
+					if err != nil {
+						return nil, fmt.Errorf("failed to parse duration")
+					}
+					return &duration, nil
+				}
+			}
+		}
+
+		// Fallback: try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms")
+		for _, detail := range details.Array() {
+			typeVal := detail.Get("@type").String()
+			if typeVal == "type.googleapis.com/google.rpc.ErrorInfo" {
+				quotaResetDelay := detail.Get("metadata.quotaResetDelay").String()
+				if quotaResetDelay != "" {
+					duration, err := time.ParseDuration(quotaResetDelay)
+					if err == nil {
+						return &duration, nil
+					}
+				}
+			}
+		}
 	}

-	for _, detail := range details.Array() {
-		typeVal := detail.Get("@type").String()
-		if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
-			retryDelay := detail.Get("retryDelay").String()
-			if retryDelay != "" {
-				// Parse duration string like "0.847655010s"
-				duration, err := time.ParseDuration(retryDelay)
-				if err != nil {
-					return nil, fmt.Errorf("failed to parse duration")
-				}
+	// Fallback: parse from error.message "Your quota will reset after Xs."
+	message := gjson.GetBytes(errorBody, "error.message").String()
+	if message != "" {
+		re := regexp.MustCompile(`after\s+(\d+)s\.?`)
+		if matches := re.FindStringSubmatch(message); len(matches) > 1 {
+			seconds, err := strconv.Atoi(matches[1])
+			if err == nil {
+				duration := time.Duration(seconds) * time.Second
 				return &duration, nil
 			}
 		}
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,12 +57,12 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -148,12 +148,12 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -219,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		}()

 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 52_428_800) // 50MB 
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -54,16 +54,18 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+	modelOverride := e.resolveUpstreamModel(req.Model, auth)
+	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
-	if upstreamModel != "" {
+	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
+	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
 	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -148,16 +150,18 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+	modelOverride := e.resolveUpstreamModel(req.Model, auth)
+	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
-	if upstreamModel != "" {
+	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
+	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
 	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -323,6 +327,27 @@ func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxy
 	return ""
 }

+func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
+	trimmed := strings.TrimSpace(model)
+	if trimmed == "" || e == nil || e.cfg == nil {
+		return false
+	}
+	compat := e.resolveCompatConfig(auth)
+	if compat == nil || len(compat.Models) == 0 {
+		return false
+	}
+	for i := range compat.Models {
+		entry := compat.Models[i]
+		if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
+			return true
+		}
+		if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
+			return true
+		}
+	}
+	return false
+}
+
 func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -48,19 +48,35 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
 // Metadata values take precedence over any existing field when the model supports thinking, intentionally
 // overwriting caller-provided values to honor suffix/default metadata priority.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
+func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
 	if field == "" {
 		return payload
 	}
+	baseModel := util.ResolveOriginalModel(model, metadata)
+	if baseModel == "" {
+		baseModel = model
+	}
+	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
+		return payload
+	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-			return updated
+		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
+			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+				return updated
+			}
+		}
+	}
+	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
+	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
+		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
+				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+					return updated
+				}
+			}
 		}
 	}
 	return payload
@@ -219,31 +235,40 @@ func matchModelPattern(pattern, model string) bool {
 // normalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
-// normalizes the reasoning effort level.
-func normalizeThinkingConfig(payload []byte, model string) []byte {
+// normalizes the reasoning effort level. For models with numeric budget thinking,
+// it strips the effort string fields.
+func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}

 	if !util.ModelSupportsThinking(model) {
-		return stripThinkingFields(payload)
+		if allowCompat {
+			return payload
+		}
+		return stripThinkingFields(payload, false)
 	}

 	if util.ModelUsesThinkingLevels(model) {
 		return normalizeReasoningEffortLevel(payload, model)
 	}

-	return payload
+	// Model supports thinking but uses numeric budgets, not levels.
+	// Strip effort string fields since they are not applicable.
+	return stripThinkingFields(payload, true)
 }

 // stripThinkingFields removes thinking-related fields from the payload for
-// models that do not support thinking.
-func stripThinkingFields(payload []byte) []byte {
+// models that do not support thinking. If effortOnly is true, only removes
+// effort string fields (for models using numeric budgets).
+func stripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
-		"reasoning",
 		"reasoning_effort",
 		"reasoning.effort",
 	}
+	if !effortOnly {
+		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
+	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,12 +51,12 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -131,12 +131,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -214,7 +214,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)

 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)
-	template, _ = sjson.Set(template, "reasoning.effort", "low")
+	template, _ = sjson.Set(template, "reasoning.effort", "medium")
 	template, _ = sjson.Set(template, "reasoning.summary", "auto")
 	template, _ = sjson.Set(template, "stream", true)
 	template, _ = sjson.Set(template, "store", false)
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -245,7 +245,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)

 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
-	out, _ = sjson.Set(out, "reasoning.effort", "low")
+	out, _ = sjson.Set(out, "reasoning.effort", "medium")
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -60,7 +60,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	if v := gjson.GetBytes(rawJSON, "reasoning_effort"); v.Exists() {
 		out, _ = sjson.Set(out, "reasoning.effort", v.Value())
 	} else {
-		out, _ = sjson.Set(out, "reasoning.effort", "low")
+		out, _ = sjson.Set(out, "reasoning.effort", "medium")
 	}
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
--- a/internal/util/claude_thinking.go
+++ b/internal/util/claude_thinking.go
@@ -28,6 +28,9 @@ func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
 // It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
 // Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
 func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
+	if !ModelSupportsThinking(modelName) {
+		return nil, false
+	}
 	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
 	if !matched {
 		return nil, false
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -25,9 +25,15 @@ func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool)
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && budget != nil && *budget != 0 {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
 		valuePath := "generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
@@ -47,9 +53,15 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && budget != nil && *budget != 0 {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
 		valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
--- a/internal/util/openai_thinking.go
+++ b/internal/util/openai_thinking.go
@@ -0,0 +1,34 @@
+package util
+
+// OpenAIThinkingBudgetToEffort maps a numeric thinking budget (tokens)
+// into an OpenAI-style reasoning effort level for level-based models.
+//
+// Ranges:
+//   - 0            -> "none"
+//   - 1..1024      -> "low"
+//   - 1025..8192   -> "medium"
+//   - 8193..24576  -> "high"
+//   - 24577..      -> highest supported level for the model (defaults to "xhigh")
+//
+// Negative values (except the dynamic -1 handled elsewhere) are treated as unsupported.
+func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) {
+	switch {
+	case budget < 0:
+		return "", false
+	case budget == 0:
+		return "none", true
+	case budget > 0 && budget <= 1024:
+		return "low", true
+	case budget <= 8192:
+		return "medium", true
+	case budget <= 24576:
+		return "high", true
+	case budget > 24576:
+		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
+			return levels[len(levels)-1], true
+		}
+		return "xhigh", true
+	default:
+		return "", false
+	}
+}
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -25,33 +25,33 @@ func ModelSupportsThinking(model string) bool {
 // or min (0 if zero is allowed and mid <= 0).
 func NormalizeThinkingBudget(model string, budget int) int {
 	if budget == -1 { // dynamic
-		if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
+		if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
 			if dynamicAllowed {
 				return -1
 			}
-			mid := (min + max) / 2
+			mid := (minBudget + maxBudget) / 2
 			if mid <= 0 && zeroAllowed {
 				return 0
 			}
 			if mid <= 0 {
-				return min
+				return minBudget
 			}
 			return mid
 		}
 		return -1
 	}
-	if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
+	if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
 		if budget == 0 {
 			if zeroAllowed {
 				return 0
 			}
-			return min
+			return minBudget
 		}
-		if budget < min {
-			return min
+		if budget < minBudget {
+			return minBudget
 		}
-		if budget > max {
-			return max
+		if budget > maxBudget {
+			return maxBudget
 		}
 		return budget
 	}
@@ -105,3 +105,16 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 	}
 	return "", false
 }
+
+// IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
+// These models may not advertise Thinking metadata in the registry.
+func IsOpenAICompatibilityModel(model string) bool {
+	if model == "" {
+		return false
+	}
+	info := registry.GetGlobalRegistry().GetModelInfo(model)
+	if info == nil {
+		return false
+	}
+	return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility")
+}
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -163,6 +163,11 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*
 	if !matched {
 		return nil, nil, false
 	}
+	// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
+	// Claude/Gemini-style protocols, so we don't derive budgets for them here.
+	if ModelUsesThinkingLevels(model) {
+		return nil, nil, false
+	}

 	if budget == nil && effort != nil {
 		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -14,6 +14,7 @@ import (
 	"os"
 	"path/filepath"
 	"reflect"
+	"runtime"
 	"sort"
 	"strings"
 	"sync"
@@ -61,6 +62,7 @@ type Watcher struct {
 	reloadCallback    func(*config.Config)
 	watcher           *fsnotify.Watcher
 	lastAuthHashes    map[string]string
+	lastRemoveTimes   map[string]time.Time
 	lastConfigHash    string
 	authQueue         chan<- AuthUpdate
 	currentAuths      map[string]*coreauth.Auth
@@ -127,8 +129,9 @@ type AuthUpdate struct {
 const (
 	// replaceCheckDelay is a short delay to allow atomic replace (rename) to settle
 	// before deciding whether a Remove event indicates a real deletion.
-	replaceCheckDelay    = 50 * time.Millisecond
-	configReloadDebounce = 150 * time.Millisecond
+	replaceCheckDelay        = 50 * time.Millisecond
+	configReloadDebounce     = 150 * time.Millisecond
+	authRemoveDebounceWindow = 1 * time.Second
 )

 // NewWatcher creates a new file watcher instance
@@ -721,8 +724,9 @@ func (w *Watcher) authFileUnchanged(path string) (bool, error) {
 	sum := sha256.Sum256(data)
 	curHash := hex.EncodeToString(sum[:])

+	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.RLock()
-	prevHash, ok := w.lastAuthHashes[path]
+	prevHash, ok := w.lastAuthHashes[normalized]
 	w.clientsMutex.RUnlock()
 	if ok && prevHash == curHash {
 		return true, nil
@@ -731,19 +735,63 @@ func (w *Watcher) authFileUnchanged(path string) (bool, error) {
 }

 func (w *Watcher) isKnownAuthFile(path string) bool {
+	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.RLock()
 	defer w.clientsMutex.RUnlock()
-	_, ok := w.lastAuthHashes[path]
+	_, ok := w.lastAuthHashes[normalized]
 	return ok
 }

+func (w *Watcher) normalizeAuthPath(path string) string {
+	trimmed := strings.TrimSpace(path)
+	if trimmed == "" {
+		return ""
+	}
+	cleaned := filepath.Clean(trimmed)
+	if runtime.GOOS == "windows" {
+		cleaned = strings.TrimPrefix(cleaned, `\\?\`)
+		cleaned = strings.ToLower(cleaned)
+	}
+	return cleaned
+}
+
+func (w *Watcher) shouldDebounceRemove(normalizedPath string, now time.Time) bool {
+	if normalizedPath == "" {
+		return false
+	}
+	w.clientsMutex.Lock()
+	if w.lastRemoveTimes == nil {
+		w.lastRemoveTimes = make(map[string]time.Time)
+	}
+	if last, ok := w.lastRemoveTimes[normalizedPath]; ok {
+		if now.Sub(last) < authRemoveDebounceWindow {
+			w.clientsMutex.Unlock()
+			return true
+		}
+	}
+	w.lastRemoveTimes[normalizedPath] = now
+	if len(w.lastRemoveTimes) > 128 {
+		cutoff := now.Add(-2 * authRemoveDebounceWindow)
+		for p, t := range w.lastRemoveTimes {
+			if t.Before(cutoff) {
+				delete(w.lastRemoveTimes, p)
+			}
+		}
+	}
+	w.clientsMutex.Unlock()
+	return false
+}
+
 // handleEvent processes individual file system events
 func (w *Watcher) handleEvent(event fsnotify.Event) {
 	// Filter only relevant events: config file or auth-dir JSON files.
 	configOps := fsnotify.Write | fsnotify.Create | fsnotify.Rename
-	isConfigEvent := event.Name == w.configPath && event.Op&configOps != 0
+	normalizedName := w.normalizeAuthPath(event.Name)
+	normalizedConfigPath := w.normalizeAuthPath(w.configPath)
+	normalizedAuthDir := w.normalizeAuthPath(w.authDir)
+	isConfigEvent := normalizedName == normalizedConfigPath && event.Op&configOps != 0
 	authOps := fsnotify.Create | fsnotify.Write | fsnotify.Remove | fsnotify.Rename
-	isAuthJSON := strings.HasPrefix(event.Name, w.authDir) && strings.HasSuffix(event.Name, ".json") && event.Op&authOps != 0
+	isAuthJSON := strings.HasPrefix(normalizedName, normalizedAuthDir) && strings.HasSuffix(normalizedName, ".json") && event.Op&authOps != 0
 	if !isConfigEvent && !isAuthJSON {
 		// Ignore unrelated files (e.g., cookie snapshots *.cookie) and other noise.
 		return
@@ -761,6 +809,10 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {

 	// Handle auth directory changes incrementally (.json only)
 	if event.Op&(fsnotify.Remove|fsnotify.Rename) != 0 {
+		if w.shouldDebounceRemove(normalizedName, now) {
+			log.Debugf("debouncing remove event for %s", filepath.Base(event.Name))
+			return
+		}
 		// Atomic replace on some platforms may surface as Rename (or Remove) before the new file is ready.
 		// Wait briefly; if the path exists again, treat as an update instead of removal.
 		time.Sleep(replaceCheckDelay)
@@ -978,7 +1030,8 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string
 				if !info.IsDir() && strings.HasSuffix(strings.ToLower(info.Name()), ".json") {
 					if data, errReadFile := os.ReadFile(path); errReadFile == nil && len(data) > 0 {
 						sum := sha256.Sum256(data)
-						w.lastAuthHashes[path] = hex.EncodeToString(sum[:])
+						normalizedPath := w.normalizeAuthPath(path)
+						w.lastAuthHashes[normalizedPath] = hex.EncodeToString(sum[:])
 					}
 				}
 				return nil
@@ -1025,6 +1078,7 @@ func (w *Watcher) addOrUpdateClient(path string) {

 	sum := sha256.Sum256(data)
 	curHash := hex.EncodeToString(sum[:])
+	normalized := w.normalizeAuthPath(path)

 	w.clientsMutex.Lock()

@@ -1034,14 +1088,14 @@ func (w *Watcher) addOrUpdateClient(path string) {
 		w.clientsMutex.Unlock()
 		return
 	}
-	if prev, ok := w.lastAuthHashes[path]; ok && prev == curHash {
+	if prev, ok := w.lastAuthHashes[normalized]; ok && prev == curHash {
 		log.Debugf("auth file unchanged (hash match), skipping reload: %s", filepath.Base(path))
 		w.clientsMutex.Unlock()
 		return
 	}

 	// Update hash cache
-	w.lastAuthHashes[path] = curHash
+	w.lastAuthHashes[normalized] = curHash

 	w.clientsMutex.Unlock() // Unlock before the callback

@@ -1056,10 +1110,11 @@ func (w *Watcher) addOrUpdateClient(path string) {

 // removeClient handles the removal of a single client.
 func (w *Watcher) removeClient(path string) {
+	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.Lock()

 	cfg := w.config
-	delete(w.lastAuthHashes, path)
+	delete(w.lastAuthHashes, normalized)

 	w.clientsMutex.Unlock() // Release the lock before the callback

--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -7,7 +7,6 @@
 package claude

 import (
-	"bufio"
 	"bytes"
 	"compress/gzip"
 	"context"
@@ -219,52 +218,24 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 }

 func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	// v6.1: Intelligent Buffered Streamer strategy
-	// Enhanced buffering with larger buffer size (16KB) and longer flush interval (120ms).
-	// Smart flush only when buffer is sufficiently filled (≥50%), dramatically reducing
-	// flush frequency from ~12.5Hz to ~5-8Hz while maintaining low latency.
-	writer := bufio.NewWriterSize(c.Writer, 16*1024) // 4KB → 16KB
-	ticker := time.NewTicker(120 * time.Millisecond) // 80ms → 120ms
-	defer ticker.Stop()
-
-	var chunkIdx int
-
+	// OpenAI-style stream forwarding: write each SSE chunk and flush immediately.
+	// This guarantees clients see incremental output even for small responses.
 	for {
 		select {
 		case <-c.Request.Context().Done():
-			// Context cancelled, flush any remaining data before exit
-			_ = writer.Flush()
 			cancel(c.Request.Context().Err())
 			return

-		case <-ticker.C:
-			// Smart flush: only flush when buffer has sufficient data (≥50% full)
-			// This reduces flush frequency while ensuring data flows naturally
-			buffered := writer.Buffered()
-			if buffered >= 8*1024 { // At least 8KB (50% of 16KB buffer)
-				if err := writer.Flush(); err != nil {
-					// Error flushing, cancel and return
-					cancel(err)
-					return
-				}
-				flusher.Flush() // Also flush the underlying http.ResponseWriter
-			}
-
 		case chunk, ok := <-data:
 			if !ok {
-				// Stream ended, flush remaining data
-				_ = writer.Flush()
+				flusher.Flush()
 				cancel(nil)
 				return
 			}
-
-			// Forward the complete SSE event block directly (already formatted by the translator).
-			// The translator returns a complete SSE-compliant event block, including event:, data:, and separators.
-			// The handler just needs to forward it without reassembly.
 			if len(chunk) > 0 {
-				_, _ = writer.Write(chunk)
+				_, _ = c.Writer.Write(chunk)
+				flusher.Flush()
 			}
-			chunkIdx++

 		case errMsg, ok := <-errs:
 			if !ok {
@@ -276,21 +247,20 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 					status = errMsg.StatusCode
 				}
 				c.Status(status)
+
 				// An error occurred: emit as a proper SSE error event
 				errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
-				_, _ = writer.WriteString("event: error\n")
-				_, _ = writer.WriteString("data: ")
-				_, _ = writer.Write(errorBytes)
-				_, _ = writer.WriteString("\n\n")
-				_ = writer.Flush()
+				_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
 				flusher.Flush()
 			}
+
 			var execErr error
 			if errMsg != nil {
 				execErr = errMsg.Error
 			}
 			cancel(execErr)
 			return
+		case <-time.After(500 * time.Millisecond):
 		}
 	}
 }
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -116,19 +116,29 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 	newCtx = context.WithValue(newCtx, "gin", c)
 	newCtx = context.WithValue(newCtx, "handler", handler)
 	return newCtx, func(params ...interface{}) {
-		if h.Cfg.RequestLog {
-			if len(params) == 1 {
-				data := params[0]
-				switch data.(type) {
-				case []byte:
-					appendAPIResponse(c, data.([]byte))
-				case error:
-					appendAPIResponse(c, []byte(data.(error).Error()))
-				case string:
-					appendAPIResponse(c, []byte(data.(string)))
-				case bool:
-				case nil:
+		if h.Cfg.RequestLog && len(params) == 1 {
+			var payload []byte
+			switch data := params[0].(type) {
+			case []byte:
+				payload = data
+			case error:
+				if data != nil {
+					payload = []byte(data.Error())
 				}
+			case string:
+				payload = []byte(data)
+			}
+			if len(payload) > 0 {
+				if existing, exists := c.Get("API_RESPONSE"); exists {
+					if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
+						trimmedPayload := bytes.TrimSpace(payload)
+						if len(trimmedPayload) > 0 && bytes.Contains(existingBytes, trimmedPayload) {
+							cancel()
+							return
+						}
+					}
+				}
+				appendAPIResponse(c, payload)
 			}
 		}

--- a/sdk/auth/iflow.go
+++ b/sdk/auth/iflow.go
@@ -107,7 +107,7 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		return nil, fmt.Errorf("iflow authentication failed: missing account identifier")
 	}

-	fileName := fmt.Sprintf("iflow-%s.json", email)
+	fileName := fmt.Sprintf("iflow-%s-%d.json", email, time.Now().Unix())
 	metadata := map[string]any{
 		"email":         email,
 		"api_key":       tokenStorage.APIKey,
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -0,0 +1,561 @@
+package test
+
+import (
+	"fmt"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// statusErr mirrors executor.statusErr to keep validation behavior aligned.
+type statusErr struct {
+	code int
+	msg  string
+}
+
+func (e statusErr) Error() string { return e.msg }
+
+// registerCoreModels loads representative models across providers into the registry
+// so NormalizeThinkingBudget and level validation use real ranges.
+func registerCoreModels(t *testing.T) func() {
+	t.Helper()
+	reg := registry.GetGlobalRegistry()
+	uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano())
+	reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
+	reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels())
+	reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels())
+	reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels())
+	return func() {
+		reg.UnregisterClient(uid + "-gemini")
+		reg.UnregisterClient(uid + "-claude")
+		reg.UnregisterClient(uid + "-openai")
+		reg.UnregisterClient(uid + "-qwen")
+	}
+}
+
+func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
+	switch fromProtocol {
+	case "gemini":
+		return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix))
+	case "openai-response":
+		return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix))
+	default: // openai / claude and other chat-style payloads
+		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix))
+	}
+}
+
+// applyThinkingMetadataLocal mirrors executor.applyThinkingMetadata.
+func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model string) []byte {
+	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	if !ok || (budgetOverride == nil && includeOverride == nil) {
+		return payload
+	}
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	if budgetOverride != nil {
+		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+		budgetOverride = &norm
+	}
+	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
+}
+
+// applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata.
+func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string) []byte {
+	if len(metadata) == 0 {
+		return payload
+	}
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	if field == "" {
+		return payload
+	}
+	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
+		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+			return updated
+		}
+	}
+	if util.ModelUsesThinkingLevels(model) {
+		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
+				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+					return updated
+				}
+			}
+		}
+	}
+	return payload
+}
+
+// normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig.
+func normalizeThinkingConfigLocal(payload []byte, model string) []byte {
+	if len(payload) == 0 || model == "" {
+		return payload
+	}
+
+	if !util.ModelSupportsThinking(model) {
+		return stripThinkingFieldsLocal(payload, false)
+	}
+
+	if util.ModelUsesThinkingLevels(model) {
+		return normalizeReasoningEffortLevelLocal(payload, model)
+	}
+
+	// Model supports thinking but uses numeric budgets, not levels.
+	// Strip effort string fields since they are not applicable.
+	return stripThinkingFieldsLocal(payload, true)
+}
+
+// stripThinkingFieldsLocal mirrors executor.stripThinkingFields.
+func stripThinkingFieldsLocal(payload []byte, effortOnly bool) []byte {
+	fieldsToRemove := []string{
+		"reasoning_effort",
+		"reasoning.effort",
+	}
+	if !effortOnly {
+		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
+	}
+	out := payload
+	for _, field := range fieldsToRemove {
+		if gjson.GetBytes(out, field).Exists() {
+			out, _ = sjson.DeleteBytes(out, field)
+		}
+	}
+	return out
+}
+
+// normalizeReasoningEffortLevelLocal mirrors executor.normalizeReasoningEffortLevel.
+func normalizeReasoningEffortLevelLocal(payload []byte, model string) []byte {
+	out := payload
+
+	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
+		}
+	}
+
+	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
+		}
+	}
+
+	return out
+}
+
+// validateThinkingConfigLocal mirrors executor.validateThinkingConfig.
+func validateThinkingConfigLocal(payload []byte, model string) error {
+	if len(payload) == 0 || model == "" {
+		return nil
+	}
+	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
+		return nil
+	}
+
+	levels := util.GetModelThinkingLevels(model)
+	checkField := func(path string) error {
+		if effort := gjson.GetBytes(payload, path); effort.Exists() {
+			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
+				return statusErr{
+					code: http.StatusBadRequest,
+					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
+				}
+			}
+		}
+		return nil
+	}
+
+	if err := checkField("reasoning_effort"); err != nil {
+		return err
+	}
+	if err := checkField("reasoning.effort"); err != nil {
+		return err
+	}
+	return nil
+}
+
+// normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
+func normalizeCodexPayload(body []byte, upstreamModel string) ([]byte, error) {
+	body = normalizeThinkingConfigLocal(body, upstreamModel)
+	if err := validateThinkingConfigLocal(body, upstreamModel); err != nil {
+		return body, err
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	body, _ = sjson.SetBytes(body, "stream", true)
+	body, _ = sjson.DeleteBytes(body, "previous_response_id")
+	return body, nil
+}
+
+// buildBodyForProtocol runs a minimal request through the same translation and
+// thinking pipeline used in executors for the given target protocol.
+func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) {
+	t.Helper()
+	normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix)
+	upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata)
+	raw := buildRawPayload(fromProtocol, modelWithSuffix)
+	stream := fromProtocol != toProtocol
+
+	body := sdktranslator.TranslateRequest(
+		sdktranslator.FromString(fromProtocol),
+		sdktranslator.FromString(toProtocol),
+		normalizedModel,
+		raw,
+		stream,
+	)
+
+	var err error
+	switch toProtocol {
+	case "gemini":
+		body = applyThinkingMetadataLocal(body, metadata, normalizedModel)
+		body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body)
+		body = util.NormalizeGeminiThinkingBudget(normalizedModel, body)
+		body = util.StripThinkingConfigIfUnsupported(normalizedModel, body)
+	case "claude":
+		if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok {
+			body = util.ApplyClaudeThinkingConfig(body, budget)
+		}
+	case "openai":
+		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort")
+		body = normalizeThinkingConfigLocal(body, upstreamModel)
+		err = validateThinkingConfigLocal(body, upstreamModel)
+	case "codex": // OpenAI responses / codex
+		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort")
+		// Mirror CodexExecutor final normalization and model override so tests log the final body.
+		body, err = normalizeCodexPayload(body, upstreamModel)
+	default:
+	}
+
+	// Mirror executor behavior: final payload uses the upstream (base) model name.
+	if upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
+
+	// For tests we only keep model + thinking-related fields to avoid noise.
+	body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel)
+	return body, err
+}
+
+// filterThinkingBody projects the translated payload down to only model and
+// thinking-related fields for the given target protocol.
+func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte {
+	if len(body) == 0 {
+		return body
+	}
+	out := []byte(`{}`)
+
+	// Preserve model if present, otherwise fall back to upstream/normalized model.
+	if m := gjson.GetBytes(body, "model"); m.Exists() {
+		out, _ = sjson.SetBytes(out, "model", m.Value())
+	} else if upstreamModel != "" {
+		out, _ = sjson.SetBytes(out, "model", upstreamModel)
+	} else if normalizedModel != "" {
+		out, _ = sjson.SetBytes(out, "model", normalizedModel)
+	}
+
+	switch toProtocol {
+	case "gemini":
+		if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() {
+			out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw))
+		}
+	case "claude":
+		if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() {
+			out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw))
+		}
+	case "openai":
+		if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() {
+			out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value())
+		}
+	case "codex":
+		if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() {
+			out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value())
+		}
+	}
+	return out
+}
+
+func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
+	cleanup := registerCoreModels(t)
+	defer cleanup()
+
+	models := []string{
+		"gpt-5",             // supports levels (low/medium/high)
+		"gemini-2.5-pro",    // supports numeric budget
+		"qwen3-coder-flash", // no thinking support
+	}
+	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
+	toProtocols := []string{"gemini", "claude", "openai", "codex"}
+
+	type scenario struct {
+		name        string
+		modelSuffix string
+		expectFn    func(info *registry.ModelInfo) (present bool, budget int64)
+	}
+
+	buildBudgetFn := func(raw int) func(info *registry.ModelInfo) (bool, int64) {
+		return func(info *registry.ModelInfo) (bool, int64) {
+			if info == nil || info.Thinking == nil {
+				return false, 0
+			}
+			return true, int64(util.NormalizeThinkingBudget(info.ID, raw))
+		}
+	}
+
+	levelBudgetFn := func(level string) func(info *registry.ModelInfo) (bool, int64) {
+		return func(info *registry.ModelInfo) (bool, int64) {
+			if info == nil || info.Thinking == nil {
+				return false, 0
+			}
+			if b, ok := util.ThinkingEffortToBudget(info.ID, level); ok {
+				return true, int64(b)
+			}
+			return false, 0
+		}
+	}
+
+	for _, model := range models {
+		info := registry.GetGlobalRegistry().GetModelInfo(model)
+		min, max := 0, 0
+		if info != nil && info.Thinking != nil {
+			min = info.Thinking.Min
+			max = info.Thinking.Max
+		}
+
+		for _, from := range fromProtocols {
+			// Scenario selection follows protocol semantics:
+			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
+			// - Claude/Gemini-style protocols express thinking as numeric budgets.
+			cases := []scenario{
+				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
+			}
+			if from == "openai" || from == "openai-response" {
+				cases = append(cases,
+					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
+					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
+					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
+				)
+			} else { // claude or gemini
+				if util.ModelUsesThinkingLevels(model) {
+					// Numeric budgets for level-based models are mapped into levels when needed.
+					cases = append(cases,
+						scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
+						scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)},
+						scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
+						scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)},
+						scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
+						scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)},
+						scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
+					)
+				} else {
+					cases = append(cases,
+						scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)},
+						scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)},
+					)
+				}
+			}
+
+			for _, to := range toProtocols {
+				if from == to {
+					continue
+				}
+				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
+				t.Logf("  %s -> %s | model: %s", from, to, model)
+				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
+				for _, cs := range cases {
+					from := from
+					to := to
+					cs := cs
+					testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name)
+					t.Run(testName, func(t *testing.T) {
+						normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix)
+						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
+							switch to {
+							case "gemini":
+								budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata)
+								if !ok || !util.ModelSupportsThinking(normalizedModel) {
+									return false, "", false
+								}
+								if include != nil && !*include {
+									return false, "", false
+								}
+								if budget == nil {
+									return false, "", false
+								}
+								norm := util.NormalizeThinkingBudget(normalizedModel, *budget)
+								return true, fmt.Sprintf("%d", norm), false
+							case "claude":
+								if !util.ModelSupportsThinking(normalizedModel) {
+									return false, "", false
+								}
+								budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata)
+								if !ok || budget == nil {
+									return false, "", false
+								}
+								return true, fmt.Sprintf("%d", *budget), false
+							case "openai":
+								if !util.ModelSupportsThinking(normalizedModel) {
+									return false, "", false
+								}
+								if !util.ModelUsesThinkingLevels(normalizedModel) {
+									// Non-levels models don't support effort strings in openai
+									return false, "", false
+								}
+								effort, ok := util.ReasoningEffortFromMetadata(metadata)
+								if !ok || strings.TrimSpace(effort) == "" {
+									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+										if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap {
+											effort = mapped
+											ok = true
+										}
+									}
+								}
+								if !ok || strings.TrimSpace(effort) == "" {
+									return false, "", false
+								}
+								effort = strings.ToLower(strings.TrimSpace(effort))
+								if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
+									return true, normalized, false
+								}
+								return false, "", true // validation would fail
+							case "codex":
+								if !util.ModelSupportsThinking(normalizedModel) {
+									return false, "", false
+								}
+								if !util.ModelUsesThinkingLevels(normalizedModel) {
+									// Non-levels models don't support effort strings in codex
+									if from != "openai-response" {
+										return false, "", false
+									}
+									return false, "", false
+								}
+								effort, ok := util.ReasoningEffortFromMetadata(metadata)
+								if ok && strings.TrimSpace(effort) != "" {
+									effort = strings.ToLower(strings.TrimSpace(effort))
+									if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
+										return true, normalized, false
+									}
+									return false, "", true
+								}
+								if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+									if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
+										mapped = strings.ToLower(strings.TrimSpace(mapped))
+										if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel {
+											return true, normalized, false
+										}
+										return false, "", true
+									}
+								}
+								if from != "openai-response" {
+									// Codex translators default reasoning.effort to "medium" when
+									// no explicit thinking suffix/metadata is provided.
+									return true, "medium", false
+								}
+								return false, "", false
+							default:
+								return false, "", false
+							}
+						}()
+
+						body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix)
+						actualPresent, actualValue := func() (bool, string) {
+							path := ""
+							switch to {
+							case "gemini":
+								path = "generationConfig.thinkingConfig.thinkingBudget"
+							case "claude":
+								path = "thinking.budget_tokens"
+							case "openai":
+								path = "reasoning_effort"
+							case "codex":
+								path = "reasoning.effort"
+							}
+							if path == "" {
+								return false, ""
+							}
+							val := gjson.GetBytes(body, path)
+							if to == "codex" && !val.Exists() {
+								reasoning := gjson.GetBytes(body, "reasoning")
+								if reasoning.Exists() {
+									val = reasoning.Get("effort")
+								}
+							}
+							if !val.Exists() {
+								return false, ""
+							}
+							if val.Type == gjson.Number {
+								return true, fmt.Sprintf("%d", val.Int())
+							}
+							return true, val.String()
+						}()
+
+						t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
+							from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
+
+						if expectErr {
+							if err == nil {
+								t.Fatalf("expected validation error but got none, body=%s", string(body))
+							}
+							return
+						}
+						if err != nil {
+							t.Fatalf("unexpected error: %v body=%s", err, string(body))
+						}
+
+						if expectPresent != actualPresent {
+							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
+						}
+						if expectPresent && expectValue != actualValue {
+							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
+						}
+					})
+				}
+			}
+		}
+	}
+}
+
+func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
+	cleanup := registerCoreModels(t)
+	defer cleanup()
+
+	cases := []struct {
+		name   string
+		model  string
+		budget int
+		want   string
+		ok     bool
+	}{
+		{name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true},
+		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
+		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
+		{name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true},
+		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
+		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
+		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
+		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 24577, want: "high", ok: true},
+		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true},
+		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
+	}
+
+	for _, cs := range cases {
+		cs := cs
+		t.Run(cs.name, func(t *testing.T) {
+			got, ok := util.OpenAIThinkingBudgetToEffort(cs.model, cs.budget)
+			if ok != cs.ok {
+				t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok)
+			}
+			if got != cs.want {
+				t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got)
+			}
+		})
+	}
+}
Author	SHA1	Message	Date
Luis Pater	5a75ef8ffd	Merge pull request #536 from AoaoMH/feature/auth-model-check feat: using Client Model Infos;	2025-12-15 00:29:33 +08:00
Test	07279f8746	feat: using Client Model Infos;	2025-12-15 00:13:05 +08:00
Luis Pater	71f788b13a	fix(registry): remove unused `ThinkingSupport` from DeepSeek-R1 model	2025-12-14 21:30:17 +08:00
Luis Pater	59c62dc580	fix(registry): correct DeepSeek-V3.2 experimental model ID	2025-12-14 21:27:43 +08:00
Luis Pater	d5310a3300	Merge pull request #531 from AoaoMH/feature/auth-model-check feat: add API endpoint to query models for auth credentials	2025-12-14 16:46:43 +08:00
Luis Pater	f0a3eb574e	fix(registry): update DeepSeek model definitions with new IDs and descriptions	2025-12-14 16:17:11 +08:00
Test	bb15855443	feat: add API endpoint to query models for auth credentials	2025-12-14 15:16:26 +08:00
Luis Pater	14ce6aebd1	Merge pull request #449 from sususu98/fix/gemini-cli-429-retry-delay-parsing fix(gemini-cli): enhance 429 retry delay parsing	2025-12-14 14:04:14 +08:00
Luis Pater	2fe83723f2	Merge pull request #515 from teeverc/fix/response-rewriter-streaming-flush fix(amp): flush response buffer after each streaming chunk write	2025-12-14 13:26:05 +08:00
teeverc	cd8c86c6fb	refactor: only flush stream response on successful write	2025-12-13 13:32:54 -08:00
teeverc	52d5fd1a67	fix: streaming for amp cli	2025-12-13 13:17:53 -08:00
Luis Pater	b6ad243e9e	Merge pull request #498 from teeverc/fix/claude-streaming-flush fix(claude): flush Claude SSE chunks immediately	2025-12-13 23:58:34 +08:00
Luis Pater	660aabc437	fix(executor): add `allowCompat` support for reasoning effort normalization Introduced `allowCompat` parameter to improve compatibility handling for reasoning effort in payloads across OpenAI and similar models.	2025-12-13 04:06:02 +08:00
Luis Pater	566120e8d5	Merge pull request #505 from router-for-me/think fix(thinking): map budgets to effort levels	2025-12-12 22:17:11 +08:00
Luis Pater	f3f0f1717d	Merge branch 'dev' into think	2025-12-12 22:16:44 +08:00
Luis Pater	7621ec609e	Merge pull request #501 from huynguyen03dev/fix/openai-compat-model-alias-resolution fix(openai-compat): prevent model alias from being overwritten	2025-12-12 21:58:15 +08:00
Luis Pater	9f511f0024	fix(executor): improve model compatibility handling for OpenAI-compatibility Enhances payload handling by introducing OpenAI-compatibility checks and refining how reasoning metadata is resolved, ensuring broader model support.	2025-12-12 21:57:25 +08:00
hkfires	374faa2640	fix(thinking): map budgets to effort levels Ensure thinking settings translate correctly across providers: - Only apply reasoning_effort to level-based models and derive it from numeric budget suffixes when present - Strip effort string fields for budget-based models and skip Claude/Gemini budget resolution for level-based or unsupported models - Default Gemini include_thoughts when a nonzero budget override is set - Add cross-protocol conversion and budget range tests	2025-12-12 21:33:20 +08:00
Luis Pater	1c52a89535	Merge pull request #502 from router-for-me/iflow fix(auth): prevent duplicate iflow BXAuth tokens	2025-12-12 20:03:37 +08:00
hkfires	e7cedbee6e	fix(auth): prevent duplicate iflow BXAuth tokens	2025-12-12 19:57:19 +08:00
Luis Pater	b8194e717c	Merge pull request #500 from router-for-me/think fix(codex): raise default reasoning effort to medium	2025-12-12 18:35:26 +08:00
huynguyen03.dev	15c3cc3a50	fix(openai-compat): prevent model alias from being overwritten by ResolveOriginalModel When using OpenAI-compatible providers with model aliases (e.g., glm-4.6-zai -> glm-4.6), the alias resolution was correctly applied but then immediately overwritten by ResolveOriginalModel, causing 'Unknown Model' errors from upstream APIs. This fix skips the ResolveOriginalModel override when a model alias has already been resolved, ensuring the correct model name is sent to the upstream provider. Co-authored-by: Amp <amp@ampcode.com>	2025-12-12 17:20:24 +07:00
hkfires	d131435e25	fix(codex): raise default reasoning effort to medium	2025-12-12 18:18:48 +08:00
Luis Pater	6e43669498	Fixed: #440 feat(watcher): normalize auth file paths and implement debounce for remove events	2025-12-12 16:50:56 +08:00
teeverc	5ab3032335	Update sdk/api/handlers/claude/code_handlers.go thank you gemini Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2025-12-12 00:26:01 -08:00
teeverc	1215c635a0	fix: flush Claude SSE chunks immediately to match OpenAI behavior - Write each SSE chunk directly to c.Writer and flush immediately - Remove buffered writer and ticker-based flushing that caused delayed output - Add 500ms timeout case for consistency with OpenAI/Gemini handlers - Clean up unused bufio import This fixes the 'not streaming' issue where small responses were held in the buffer until timeout/threshold was reached. Amp-Thread-ID: https://ampcode.com/threads/T-019b1186-164e-740c-96ab-856f64ee6bee Co-authored-by: Amp <amp@ampcode.com>	2025-12-12 00:14:19 -08:00
Luis Pater	fc054db51a	Merge pull request #494 from ben-vargas/fix-gpt-reasoning-none fix(models): add "none" reasoning effort level to gpt-5.2	2025-12-12 08:53:19 +08:00
Luis Pater	6e2306a5f2	refactor(handlers): improve request logging and payload handling	2025-12-12 08:52:52 +08:00
Ben Vargas	b09e2115d1	fix(models): add "none" reasoning effort level to gpt-5.2 Per OpenAI API documentation, gpt-5.2 supports reasoning_effort values of "none", "low", "medium", "high", and "xhigh". The "none" level was missing from the model definition. Reference: https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort	2025-12-11 15:26:23 -07:00
sususu	07d21463ca	fix(gemini-cli): enhance 429 retry delay parsing Add fallback parsing for quota reset delay when RetryInfo is not present: - Try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms") - Parse from error.message "Your quota will reset after Xs." This ensures proper cooldown timing for rate-limited requests. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-11 09:34:39 +08:00