fix(translator): handle empty item type and string content in OpenAI response parser

Merge pull request #547 from router-for-me/amp
feat(amp): require API key authentication for management routes
2026-02-06 06:20:51 +08:00 · 2025-12-15 20:35:52 +08:00 · 2025-12-15 16:14:49 +08:00 · 2025-12-15 15:59:16 +08:00 · 2025-12-15 13:24:53 +08:00 · 2025-12-15 13:14:55 +08:00
49 changed files with 1623 additions and 339 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -136,8 +136,8 @@ ws-auth: false
 #   upstream-url: "https://ampcode.com"
 #   # Optional: Override API key for Amp upstream (otherwise uses env or file)
 #   upstream-api-key: ""
-#   # Restrict Amp management routes (/api/auth, /api/user, etc.) to localhost only (recommended)
-#   restrict-management-to-localhost: true
+#   # Restrict Amp management routes (/api/auth, /api/user, etc.) to localhost only (default: false)
+#   restrict-management-to-localhost: false
 #   # Force model mappings to run before checking local API keys (default: false)
 #   force-model-mappings: false
 #   # Amp Model Mappings
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -26,6 +26,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -266,6 +267,54 @@ func (h *Handler) ListAuthFiles(c *gin.Context) {
 	c.JSON(200, gin.H{"files": files})
 }

+// GetAuthFileModels returns the models supported by a specific auth file
+func (h *Handler) GetAuthFileModels(c *gin.Context) {
+	name := c.Query("name")
+	if name == "" {
+		c.JSON(400, gin.H{"error": "name is required"})
+		return
+	}
+
+	// Try to find auth ID via authManager
+	var authID string
+	if h.authManager != nil {
+		auths := h.authManager.List()
+		for _, auth := range auths {
+			if auth.FileName == name || auth.ID == name {
+				authID = auth.ID
+				break
+			}
+		}
+	}
+
+	if authID == "" {
+		authID = name // fallback to filename as ID
+	}
+
+	// Get models from registry
+	reg := registry.GetGlobalRegistry()
+	models := reg.GetModelsForClient(authID)
+
+	result := make([]gin.H, 0, len(models))
+	for _, m := range models {
+		entry := gin.H{
+			"id": m.ID,
+		}
+		if m.DisplayName != "" {
+			entry["display_name"] = m.DisplayName
+		}
+		if m.Type != "" {
+			entry["type"] = m.Type
+		}
+		if m.OwnedBy != "" {
+			entry["owned_by"] = m.OwnedBy
+		}
+		result = append(result, entry)
+	}
+
+	c.JSON(200, gin.H{"models": result})
+}
+
 // List auth files from disk when the auth manager is unavailable.
 func (h *Handler) listAuthFilesFromDisk(c *gin.Context) {
 	entries, err := os.ReadDir(h.cfg.AuthDir)
@@ -1722,6 +1771,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 		return
 	}

+	// Check for duplicate BXAuth before authentication
+	bxAuth := iflowauth.ExtractBXAuth(cookieValue)
+	if existingFile, err := iflowauth.CheckDuplicateBXAuth(h.cfg.AuthDir, bxAuth); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to check duplicate"})
+		return
+	} else if existingFile != "" {
+		existingFileName := filepath.Base(existingFile)
+		c.JSON(http.StatusConflict, gin.H{"status": "error", "error": "duplicate BXAuth found", "existing_file": existingFileName})
+		return
+	}
+
 	authSvc := iflowauth.NewIFlowAuth(h.cfg)
 	tokenData, errAuth := authSvc.AuthenticateWithCookie(ctx, cookieValue)
 	if errAuth != nil {
@@ -1744,11 +1804,12 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	}

 	tokenStorage.Email = email
+	timestamp := time.Now().Unix()

 	record := &coreauth.Auth{
-		ID:       fmt.Sprintf("iflow-%s.json", fileName),
+		ID:       fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
 		Provider: "iflow",
-		FileName: fmt.Sprintf("iflow-%s.json", fileName),
+		FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
 		Storage:  tokenStorage,
 		Metadata: map[string]any{
 			"email":        email,
--- a/internal/api/modules/amp/amp.go
+++ b/internal/api/modules/amp/amp.go
@@ -137,7 +137,8 @@ func (m *AmpModule) Register(ctx modules.Context) error {
 		m.registerProviderAliases(ctx.Engine, ctx.BaseHandler, auth)

 		// Register management proxy routes once; middleware will gate access when upstream is unavailable.
-		m.registerManagementRoutes(ctx.Engine, ctx.BaseHandler)
+		// Pass auth middleware to require valid API key for all management routes.
+		m.registerManagementRoutes(ctx.Engine, ctx.BaseHandler, auth)

 		// If no upstream URL, skip proxy routes but provider aliases are still available
 		if upstreamURL == "" {
@@ -187,9 +188,6 @@ func (m *AmpModule) OnConfigUpdated(cfg *config.Config) error {

 	if oldSettings != nil && oldSettings.RestrictManagementToLocalhost != newSettings.RestrictManagementToLocalhost {
 		m.setRestrictToLocalhost(newSettings.RestrictManagementToLocalhost)
-		if !newSettings.RestrictManagementToLocalhost {
-			log.Warnf("amp management routes now accessible from any IP - this is insecure!")
-		}
 	}

 	newUpstreamURL := strings.TrimSpace(newSettings.UpstreamURL)
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -64,7 +64,7 @@ func logAmpRouting(routeType AmpRouteType, requestedModel, resolvedModel, provid
 		fields["cost"] = "amp_credits"
 		fields["source"] = "ampcode.com"
 		fields["model_id"] = requestedModel // Explicit model_id for easy config reference
-		log.WithFields(fields).Warnf("forwarding to ampcode.com (uses amp credits) - model_id: %s | To use local proxy, add to config: amp-model-mappings: [{from: \"%s\", to: \"<your-local-model>\"}]", requestedModel, requestedModel)
+		log.WithFields(fields).Warnf("forwarding to ampcode.com (uses amp credits) - model_id: %s | To use local provider, add to config: ampcode.model-mappings: [{from: \"%s\", to: \"<your-local-model>\"}]", requestedModel, requestedModel)

 	case RouteTypeNoProvider:
 		fields["cost"] = "none"
--- a/internal/api/modules/amp/proxy.go
+++ b/internal/api/modules/amp/proxy.go
@@ -41,6 +41,11 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 		originalDirector(req)
 		req.Host = parsed.Host

+		// Remove client's Authorization header - it was only used for CLI Proxy API authentication
+		// We will set our own Authorization using the configured upstream-api-key
+		req.Header.Del("Authorization")
+		req.Header.Del("X-Api-Key")
+
 		// Preserve correlation headers for debugging
 		if req.Header.Get("X-Request-ID") == "" {
 			// Could generate one here if needed
@@ -50,7 +55,7 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 		// Users going through ampcode.com proxy are paying for the service and should get all features
 		// including 1M context window (context-1m-2025-08-07)

-		// Inject API key from secret source (precedence: config > env > file)
+		// Inject API key from secret source (only uses upstream-api-key from config)
 		if key, err := secretSource.Get(req.Context()); err == nil && key != "" {
 			req.Header.Set("X-Api-Key", key)
 			req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", key))
--- a/internal/api/modules/amp/response_rewriter.go
+++ b/internal/api/modules/amp/response_rewriter.go
@@ -39,7 +39,13 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
 	}

 	if rw.isStreaming {
-		return rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
+		n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
+		if err == nil {
+			if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
+				flusher.Flush()
+			}
+		}
+		return n, err
 	}
 	return rw.body.Write(data)
 }
--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -98,7 +98,8 @@ func (m *AmpModule) managementAvailabilityMiddleware() gin.HandlerFunc {
 // registerManagementRoutes registers Amp management proxy routes
 // These routes proxy through to the Amp control plane for OAuth, user management, etc.
 // Uses dynamic middleware and proxy getter for hot-reload support.
-func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *handlers.BaseAPIHandler) {
+// The auth middleware validates Authorization header against configured API keys.
+func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *handlers.BaseAPIHandler, auth gin.HandlerFunc) {
 	ampAPI := engine.Group("/api")

 	// Always disable CORS for management routes to prevent browser-based attacks
@@ -107,8 +108,9 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	// Apply dynamic localhost-only restriction (hot-reloadable via m.IsRestrictedToLocalhost())
 	ampAPI.Use(m.localhostOnlyMiddleware())

-	if !m.IsRestrictedToLocalhost() {
-		log.Warn("amp management routes are NOT restricted to localhost - this is insecure!")
+	// Apply authentication middleware - requires valid API key in Authorization header
+	if auth != nil {
+		ampAPI.Use(auth)
 	}

 	// Dynamic proxy handler that uses m.getProxy() for hot-reload support
@@ -154,6 +156,9 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	// Root-level routes that AMP CLI expects without /api prefix
 	// These need the same security middleware as the /api/* routes (dynamic for hot-reload)
 	rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()}
+	if auth != nil {
+		rootMiddleware = append(rootMiddleware, auth)
+	}
 	engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...)
 	engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...)
 	engine.GET("/news.rss", append(rootMiddleware, proxyHandler)...)
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -568,6 +568,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)

 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
+		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
--- a/internal/auth/iflow/cookie_helpers.go
+++ b/internal/auth/iflow/cookie_helpers.go
@@ -1,7 +1,10 @@
 package iflow

 import (
+	"encoding/json"
 	"fmt"
+	"os"
+	"path/filepath"
 	"strings"
 )

@@ -36,3 +39,61 @@ func SanitizeIFlowFileName(raw string) string {
 	}
 	return strings.TrimSpace(result.String())
 }
+
+// ExtractBXAuth extracts the BXAuth value from a cookie string.
+func ExtractBXAuth(cookie string) string {
+	parts := strings.Split(cookie, ";")
+	for _, part := range parts {
+		part = strings.TrimSpace(part)
+		if strings.HasPrefix(part, "BXAuth=") {
+			return strings.TrimPrefix(part, "BXAuth=")
+		}
+	}
+	return ""
+}
+
+// CheckDuplicateBXAuth checks if the given BXAuth value already exists in any iflow auth file.
+// Returns the path of the existing file if found, empty string otherwise.
+func CheckDuplicateBXAuth(authDir, bxAuth string) (string, error) {
+	if bxAuth == "" {
+		return "", nil
+	}
+
+	entries, err := os.ReadDir(authDir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return "", nil
+		}
+		return "", fmt.Errorf("read auth dir failed: %w", err)
+	}
+
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+		if !strings.HasPrefix(name, "iflow-") || !strings.HasSuffix(name, ".json") {
+			continue
+		}
+
+		filePath := filepath.Join(authDir, name)
+		data, err := os.ReadFile(filePath)
+		if err != nil {
+			continue
+		}
+
+		var tokenData struct {
+			Cookie string `json:"cookie"`
+		}
+		if err := json.Unmarshal(data, &tokenData); err != nil {
+			continue
+		}
+
+		existingBXAuth := ExtractBXAuth(tokenData.Cookie)
+		if existingBXAuth != "" && existingBXAuth == bxAuth {
+			return filePath, nil
+		}
+	}
+
+	return "", nil
+}
--- a/internal/auth/iflow/iflow_auth.go
+++ b/internal/auth/iflow/iflow_auth.go
@@ -494,11 +494,18 @@ func (ia *IFlowAuth) CreateCookieTokenStorage(data *IFlowTokenData) *IFlowTokenS
 		return nil
 	}

+	// Only save the BXAuth field from the cookie
+	bxAuth := ExtractBXAuth(data.Cookie)
+	cookieToSave := ""
+	if bxAuth != "" {
+		cookieToSave = "BXAuth=" + bxAuth + ";"
+	}
+
 	return &IFlowTokenStorage{
 		APIKey:      data.APIKey,
 		Email:       data.Email,
 		Expire:      data.Expire,
-		Cookie:      data.Cookie,
+		Cookie:      cookieToSave,
 		LastRefresh: time.Now().Format(time.RFC3339),
 		Type:        "iflow",
 	}
--- a/internal/cmd/iflow_cookie.go
+++ b/internal/cmd/iflow_cookie.go
@@ -5,7 +5,9 @@ import (
 	"context"
 	"fmt"
 	"os"
+	"path/filepath"
 	"strings"
+	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -37,6 +39,16 @@ func DoIFlowCookieAuth(cfg *config.Config, options *LoginOptions) {
 		return
 	}

+	// Check for duplicate BXAuth before authentication
+	bxAuth := iflow.ExtractBXAuth(cookie)
+	if existingFile, err := iflow.CheckDuplicateBXAuth(cfg.AuthDir, bxAuth); err != nil {
+		fmt.Printf("Failed to check duplicate: %v\n", err)
+		return
+	} else if existingFile != "" {
+		fmt.Printf("Duplicate BXAuth found, authentication already exists: %s\n", filepath.Base(existingFile))
+		return
+	}
+
 	// Authenticate with cookie
 	auth := iflow.NewIFlowAuth(cfg)
 	ctx := context.Background()
@@ -82,5 +94,5 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
 // getAuthFilePath returns the auth file path for the given provider and email
 func getAuthFilePath(cfg *config.Config, provider, email string) string {
 	fileName := iflow.SanitizeIFlowFileName(email)
-	return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, fileName)
+	return fmt.Sprintf("%s/%s-%s-%d.json", cfg.AuthDir, provider, fileName, time.Now().Unix())
 }
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -139,7 +139,7 @@ type AmpCode struct {

 	// RestrictManagementToLocalhost restricts Amp management routes (/api/user, /api/threads, etc.)
 	// to only accept connections from localhost (127.0.0.1, ::1). When true, prevents drive-by
-	// browser attacks and remote access to management endpoints. Default: true (recommended).
+	// browser attacks and remote access to management endpoints. Default: false (API key auth is sufficient).
 	RestrictManagementToLocalhost bool `yaml:"restrict-management-to-localhost" json:"restrict-management-to-localhost"`

 	// ModelMappings defines model name mappings for Amp CLI requests.
@@ -327,7 +327,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	cfg.LoggingToFile = false
 	cfg.UsageStatisticsEnabled = false
 	cfg.DisableCooling = false
-	cfg.AmpCode.RestrictManagementToLocalhost = true // Default to secure: only localhost access
+	cfg.AmpCode.RestrictManagementToLocalhost = false // Default to false: API key auth is sufficient
 	if err = yaml.Unmarshal(data, &cfg); err != nil {
 		if optional {
 			// In cloud deploy mode, if YAML parsing fails, return empty config instead of error.
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -580,7 +580,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
 		},
 	}
 }
@@ -648,10 +648,11 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
 		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
-		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
+		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
+		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
 		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
 		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -90,6 +90,9 @@ type ModelRegistry struct {
 	models map[string]*ModelRegistration
 	// clientModels maps client ID to the models it provides
 	clientModels map[string][]string
+	// clientModelInfos maps client ID to a map of model ID -> ModelInfo
+	// This preserves the original model info provided by each client
+	clientModelInfos map[string]map[string]*ModelInfo
 	// clientProviders maps client ID to its provider identifier
 	clientProviders map[string]string
 	// mutex ensures thread-safe access to the registry
@@ -104,10 +107,11 @@ var registryOnce sync.Once
 func GetGlobalRegistry() *ModelRegistry {
 	registryOnce.Do(func() {
 		globalRegistry = &ModelRegistry{
-			models:          make(map[string]*ModelRegistration),
-			clientModels:    make(map[string][]string),
-			clientProviders: make(map[string]string),
-			mutex:           &sync.RWMutex{},
+			models:           make(map[string]*ModelRegistration),
+			clientModels:     make(map[string][]string),
+			clientModelInfos: make(map[string]map[string]*ModelInfo),
+			clientProviders:  make(map[string]string),
+			mutex:            &sync.RWMutex{},
 		}
 	})
 	return globalRegistry
@@ -144,6 +148,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		// No models supplied; unregister existing client state if present.
 		r.unregisterClientInternal(clientID)
 		delete(r.clientModels, clientID)
+		delete(r.clientModelInfos, clientID)
 		delete(r.clientProviders, clientID)
 		misc.LogCredentialSeparator()
 		return
@@ -152,7 +157,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 	now := time.Now()

 	oldModels, hadExisting := r.clientModels[clientID]
-	oldProvider, _ := r.clientProviders[clientID]
+	oldProvider := r.clientProviders[clientID]
 	providerChanged := oldProvider != provider
 	if !hadExisting {
 		// Pure addition path.
@@ -161,6 +166,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 			r.addModelRegistration(modelID, provider, model, now)
 		}
 		r.clientModels[clientID] = append([]string(nil), rawModelIDs...)
+		// Store client's own model infos
+		clientInfos := make(map[string]*ModelInfo, len(newModels))
+		for id, m := range newModels {
+			clientInfos[id] = cloneModelInfo(m)
+		}
+		r.clientModelInfos[clientID] = clientInfos
 		if provider != "" {
 			r.clientProviders[clientID] = provider
 		} else {
@@ -287,6 +298,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 	if len(rawModelIDs) > 0 {
 		r.clientModels[clientID] = append([]string(nil), rawModelIDs...)
 	}
+	// Update client's own model infos
+	clientInfos := make(map[string]*ModelInfo, len(newModels))
+	for id, m := range newModels {
+		clientInfos[id] = cloneModelInfo(m)
+	}
+	r.clientModelInfos[clientID] = clientInfos
 	if provider != "" {
 		r.clientProviders[clientID] = provider
 	} else {
@@ -436,6 +453,7 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
 	}

 	delete(r.clientModels, clientID)
+	delete(r.clientModelInfos, clientID)
 	if hasProvider {
 		delete(r.clientProviders, clientID)
 	}
@@ -871,3 +889,44 @@ func (r *ModelRegistry) GetFirstAvailableModel(handlerType string) (string, erro

 	return "", fmt.Errorf("no available clients for any model in handler type: %s", handlerType)
 }
+
+// GetModelsForClient returns the models registered for a specific client.
+// Parameters:
+//   - clientID: The client identifier (typically auth file name or auth ID)
+//
+// Returns:
+//   - []*ModelInfo: List of models registered for this client, nil if client not found
+func (r *ModelRegistry) GetModelsForClient(clientID string) []*ModelInfo {
+	r.mutex.RLock()
+	defer r.mutex.RUnlock()
+
+	modelIDs, exists := r.clientModels[clientID]
+	if !exists || len(modelIDs) == 0 {
+		return nil
+	}
+
+	// Try to use client-specific model infos first
+	clientInfos := r.clientModelInfos[clientID]
+
+	seen := make(map[string]struct{})
+	result := make([]*ModelInfo, 0, len(modelIDs))
+	for _, modelID := range modelIDs {
+		if _, dup := seen[modelID]; dup {
+			continue
+		}
+		seen[modelID] = struct{}{}
+
+		// Prefer client's own model info to preserve original type/owned_by
+		if clientInfos != nil {
+			if info, ok := clientInfos[modelID]; ok && info != nil {
+				result = append(result, info)
+				continue
+			}
+		}
+		// Fallback to global registry (for backwards compatibility)
+		if reg, ok := r.models[modelID]; ok && reg.Info != nil {
+			result = append(result, reg.Info)
+		}
+	}
+	return result
+}
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -322,7 +322,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	payload = applyThinkingMetadata(payload, req.Metadata, req.Model)
+	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
 	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
 	payload = util.ConvertThinkingLevelToBudget(payload)
 	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
@@ -384,8 +384,16 @@ func ensureColonSpacedJSON(payload []byte) []byte {

 	for i := 0; i < len(indented); i++ {
 		ch := indented[i]
-		if ch == '"' && (i == 0 || indented[i-1] != '\\') {
-			inString = !inString
+		if ch == '"' {
+			// A quote is escaped only when preceded by an odd number of consecutive backslashes.
+			// For example: "\\\"" keeps the quote inside the string, but "\\\\" closes the string.
+			backslashes := 0
+			for j := i - 1; j >= 0 && indented[j] == '\\'; j-- {
+				backslashes++
+			}
+			if backslashes%2 == 0 {
+				inString = !inString
+			}
 		}

 		if !inString {
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,9 +54,9 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
-	body = normalizeThinkingConfig(body, upstreamModel)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -152,9 +152,9 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
-	body = normalizeThinkingConfig(body, upstreamModel)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	modelForCounting := req.Model

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -11,6 +11,8 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"regexp"
+	"strconv"
 	"strings"
 	"time"

@@ -784,20 +786,45 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
 	// Try to parse the retryDelay from the error response
 	// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
 	details := gjson.GetBytes(errorBody, "error.details")
-	if !details.Exists() || !details.IsArray() {
-		return nil, fmt.Errorf("no error.details found")
+	if details.Exists() && details.IsArray() {
+		for _, detail := range details.Array() {
+			typeVal := detail.Get("@type").String()
+			if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
+				retryDelay := detail.Get("retryDelay").String()
+				if retryDelay != "" {
+					// Parse duration string like "0.847655010s"
+					duration, err := time.ParseDuration(retryDelay)
+					if err != nil {
+						return nil, fmt.Errorf("failed to parse duration")
+					}
+					return &duration, nil
+				}
+			}
+		}
+
+		// Fallback: try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms")
+		for _, detail := range details.Array() {
+			typeVal := detail.Get("@type").String()
+			if typeVal == "type.googleapis.com/google.rpc.ErrorInfo" {
+				quotaResetDelay := detail.Get("metadata.quotaResetDelay").String()
+				if quotaResetDelay != "" {
+					duration, err := time.ParseDuration(quotaResetDelay)
+					if err == nil {
+						return &duration, nil
+					}
+				}
+			}
+		}
 	}

-	for _, detail := range details.Array() {
-		typeVal := detail.Get("@type").String()
-		if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
-			retryDelay := detail.Get("retryDelay").String()
-			if retryDelay != "" {
-				// Parse duration string like "0.847655010s"
-				duration, err := time.ParseDuration(retryDelay)
-				if err != nil {
-					return nil, fmt.Errorf("failed to parse duration")
-				}
+	// Fallback: parse from error.message "Your quota will reset after Xs."
+	message := gjson.GetBytes(errorBody, "error.message").String()
+	if message != "" {
+		re := regexp.MustCompile(`after\s+(\d+)s\.?`)
+		if matches := re.FindStringSubmatch(message); len(matches) > 1 {
+			seconds, err := strconv.Atoi(matches[1])
+			if err == nil {
+				duration := time.Duration(seconds) * time.Second
 				return &duration, nil
 			}
 		}
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -83,7 +83,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
@@ -178,7 +178,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
@@ -290,7 +290,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	translatedReq = applyThinkingMetadata(translatedReq, req.Metadata, req.Model)
+	translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, req.Model)
 	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
 	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,13 +57,13 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -148,13 +148,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
@@ -219,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		}()

 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 52_428_800) // 50MB 
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -54,17 +54,19 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+	modelOverride := e.resolveUpstreamModel(req.Model, auth)
+	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
+	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
-	if upstreamModel != "" {
+	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
-	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+	translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
+	if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}

@@ -148,17 +150,19 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+	modelOverride := e.resolveUpstreamModel(req.Model, auth)
+	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
+	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
-	if upstreamModel != "" {
+	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
-	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+	translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
+	if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}

@@ -323,6 +327,27 @@ func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxy
 	return ""
 }

+func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
+	trimmed := strings.TrimSpace(model)
+	if trimmed == "" || e == nil || e.cfg == nil {
+		return false
+	}
+	compat := e.resolveCompatConfig(auth)
+	if compat == nil || len(compat.Models) == 0 {
+		return false
+	}
+	for i := range compat.Models {
+		entry := compat.Models[i]
+		if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
+			return true
+		}
+		if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
+			return true
+		}
+	}
+	return false
+}
+
 func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -11,9 +11,9 @@ import (
 	"github.com/tidwall/sjson"
 )

-// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
+// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
-func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
+func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
@@ -45,22 +45,44 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }

-// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
+// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
 // Metadata values take precedence over any existing field when the model supports thinking, intentionally
 // overwriting caller-provided values to honor suffix/default metadata priority.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
+func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
 	if field == "" {
 		return payload
 	}
+	baseModel := util.ResolveOriginalModel(model, metadata)
+	if baseModel == "" {
+		baseModel = model
+	}
+	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
+		return payload
+	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-			return updated
+		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
+			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+				return updated
+			}
+		}
+	}
+	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
+	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
+		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
+				if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) {
+					if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported {
+						return StripThinkingFields(payload, false)
+					}
+				}
+
+				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+					return updated
+				}
+			}
 		}
 	}
 	return payload
@@ -216,34 +238,43 @@ func matchModelPattern(pattern, model string) bool {
 	return pi == len(pattern)
 }

-// normalizeThinkingConfig normalizes thinking-related fields in the payload
+// NormalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
-// normalizes the reasoning effort level.
-func normalizeThinkingConfig(payload []byte, model string) []byte {
+// normalizes the reasoning effort level. For models with numeric budget thinking,
+// it strips the effort string fields.
+func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}

 	if !util.ModelSupportsThinking(model) {
-		return stripThinkingFields(payload)
+		if allowCompat {
+			return payload
+		}
+		return StripThinkingFields(payload, false)
 	}

 	if util.ModelUsesThinkingLevels(model) {
-		return normalizeReasoningEffortLevel(payload, model)
+		return NormalizeReasoningEffortLevel(payload, model)
 	}

-	return payload
+	// Model supports thinking but uses numeric budgets, not levels.
+	// Strip effort string fields since they are not applicable.
+	return StripThinkingFields(payload, true)
 }

-// stripThinkingFields removes thinking-related fields from the payload for
-// models that do not support thinking.
-func stripThinkingFields(payload []byte) []byte {
+// StripThinkingFields removes thinking-related fields from the payload for
+// models that do not support thinking. If effortOnly is true, only removes
+// effort string fields (for models using numeric budgets).
+func StripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
-		"reasoning",
 		"reasoning_effort",
 		"reasoning.effort",
 	}
+	if !effortOnly {
+		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
+	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
@@ -253,9 +284,9 @@ func stripThinkingFields(payload []byte) []byte {
 	return out
 }

-// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
+// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
 // or reasoning.effort field for level-based thinking models.
-func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
+func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
 	out := payload

 	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
@@ -273,10 +304,10 @@ func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
 	return out
 }

-// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
+// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
 // Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
 // downgrading requests.
-func validateThinkingConfig(payload []byte, model string) error {
+func ValidateThinkingConfig(payload []byte, model string) error {
 	if len(payload) == 0 || model == "" {
 		return nil
 	}
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,13 +51,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -131,13 +131,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	toolsResult := gjson.GetBytes(body, "tools")
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -122,6 +122,38 @@ type FunctionCallGroup struct {
 	ResponsesNeeded int
 }

+// parseFunctionResponse attempts to unmarshal a function response part.
+// Falls back to gjson extraction if standard json.Unmarshal fails.
+func parseFunctionResponse(response gjson.Result) map[string]interface{} {
+	var responseMap map[string]interface{}
+	err := json.Unmarshal([]byte(response.Raw), &responseMap)
+	if err == nil {
+		return responseMap
+	}
+
+	log.Debugf("unmarshal function response failed, using fallback: %v", err)
+	funcResp := response.Get("functionResponse")
+	if funcResp.Exists() {
+		fr := map[string]interface{}{
+			"name": funcResp.Get("name").String(),
+			"response": map[string]interface{}{
+				"result": funcResp.Get("response").String(),
+			},
+		}
+		if id := funcResp.Get("id").String(); id != "" {
+			fr["id"] = id
+		}
+		return map[string]interface{}{"functionResponse": fr}
+	}
+
+	return map[string]interface{}{
+		"functionResponse": map[string]interface{}{
+			"name":     "unknown",
+			"response": map[string]interface{}{"result": response.String()},
+		},
+	}
+}
+
 // fixCLIToolResponse performs sophisticated tool response format conversion and grouping.
 // This function transforms the CLI tool response format by intelligently grouping function calls
 // with their corresponding responses, ensuring proper conversation flow and API compatibility.
@@ -180,13 +212,7 @@ func fixCLIToolResponse(input string) (string, error) {
 					// Create merged function response content
 					var responseParts []interface{}
 					for _, response := range groupResponses {
-						var responseMap map[string]interface{}
-						errUnmarshal := json.Unmarshal([]byte(response.Raw), &responseMap)
-						if errUnmarshal != nil {
-							log.Warnf("failed to unmarshal function response: %v\n", errUnmarshal)
-							continue
-						}
-						responseParts = append(responseParts, responseMap)
+						responseParts = append(responseParts, parseFunctionResponse(response))
 					}

 					if len(responseParts) > 0 {
@@ -265,13 +291,7 @@ func fixCLIToolResponse(input string) (string, error) {

 			var responseParts []interface{}
 			for _, response := range groupResponses {
-				var responseMap map[string]interface{}
-				errUnmarshal := json.Unmarshal([]byte(response.Raw), &responseMap)
-				if errUnmarshal != nil {
-					log.Warnf("failed to unmarshal function response: %v\n", errUnmarshal)
-					continue
-				}
-				responseParts = append(responseParts, responseMap)
+				responseParts = append(responseParts, parseFunctionResponse(response))
 			}

 			if len(responseParts) > 0 {
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -39,31 +39,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		}
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
 	}

 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -114,14 +114,16 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() {
-				if includeThoughts.Type == gjson.True {
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-						out, _ = sjson.Set(out, "thinking.budget_tokens", thinkingBudget.Int())
-					}
-				}
+		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+			// Check for thinkingBudget first - if present, enable thinking with budget
+			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
+				normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
+				out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
+			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				// Fallback to include_thoughts if no budget specified
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
 	}
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -16,6 +16,7 @@ import (
 	"strings"

 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -65,18 +66,23 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream

 	root := gjson.ParseBytes(rawJSON)

-	if v := root.Get("reasoning_effort"); v.Exists() {
-		out, _ = sjson.Set(out, "thinking.type", "enabled")
-
-		switch v.String() {
-		case "none":
-			out, _ = sjson.Set(out, "thinking.type", "disabled")
-		case "low":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
-		case "medium":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
-		case "high":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
+	if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				}
+			}
 		}
 	}

--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,6 +10,7 @@ import (
 	"strings"

 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -52,20 +53,23 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte

 	root := gjson.ParseBytes(rawJSON)

-	if v := root.Get("reasoning.effort"); v.Exists() {
-		out, _ = sjson.Set(out, "thinking.type", "enabled")
-
-		switch v.String() {
-		case "none":
-			out, _ = sjson.Set(out, "thinking.type", "disabled")
-		case "minimal":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
-		case "low":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 4096)
-		case "medium":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
-		case "high":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
+	if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				}
+			}
 		}
 	}

--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,6 +12,7 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -214,7 +215,22 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)

 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)
-	template, _ = sjson.Set(template, "reasoning.effort", "low")
+
+	// Convert thinking.budget_tokens to reasoning.effort for level-based models
+	reasoningEffort := "medium" // default
+	if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+		if thinking.Get("type").String() == "enabled" {
+			if util.ModelUsesThinkingLevels(modelName) {
+				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+					budget := int(budgetTokens.Int())
+					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+						reasoningEffort = effort
+					}
+				}
+			}
+		}
+	}
+	template, _ = sjson.Set(template, "reasoning.effort", reasoningEffort)
 	template, _ = sjson.Set(template, "reasoning.summary", "auto")
 	template, _ = sjson.Set(template, "stream", true)
 	template, _ = sjson.Set(template, "store", false)
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -245,7 +245,22 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)

 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
-	out, _ = sjson.Set(out, "reasoning.effort", "low")
+
+	// Convert thinkingBudget to reasoning.effort for level-based models
+	reasoningEffort := "medium" // default
+	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			if util.ModelUsesThinkingLevels(modelName) {
+				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+					budget := int(thinkingBudget.Int())
+					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+						reasoningEffort = effort
+					}
+				}
+			}
+		}
+	}
+	out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -60,7 +60,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	if v := gjson.GetBytes(rawJSON, "reasoning_effort"); v.Exists() {
 		out, _ = sjson.Set(out, "reasoning.effort", v.Value())
 	} else {
-		out, _ = sjson.Set(out, "reasoning.effort", "low")
+		out, _ = sjson.Set(out, "reasoning.effort", "medium")
 	}
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -39,31 +39,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		}
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
 	}

 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -154,7 +154,8 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	}

 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -37,33 +37,17 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)

 	// Reasoning effort -> thinkingBudget/include_thoughts
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Only convert for models that use numeric budgets (not discrete levels) to avoid
+	// incorrectly applying thinkingBudget for level-based models like gpt-5.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		}
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		out = util.ApplyReasoningEffortToGemini(out, re.String())
 	}

 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -389,36 +389,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	}

 	// OpenAI official reasoning fields take precedence
+	// Only convert for models that use numeric budgets (not discrete levels).
 	hasOfficialThinking := root.Get("reasoning.effort").Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		reasoningEffort := root.Get("reasoning.effort")
-		switch reasoningEffort.String() {
-		case "none":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "minimal":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		}
+		out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
 	}

 	// Cherry Studio extension (applies only when official fields are missing)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -10,6 +10,7 @@ import (
 	"encoding/json"
 	"strings"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -60,6 +61,18 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	// Stream
 	out, _ = sjson.Set(out, "stream", stream)

+	// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
+	if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+		if thinkingType := thinking.Get("type"); thinkingType.Exists() && thinkingType.String() == "enabled" {
+			if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+				budget := int(budgetTokens.Int())
+				if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
+				}
+			}
+		}
+	}
+
 	// Process messages and system
 	var messagesJSON = "[]"

--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -13,6 +13,7 @@ import (
 	"math/big"
 	"strings"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -76,6 +77,17 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 				out, _ = sjson.Set(out, "stop", stops)
 			}
 		}
+
+		// Convert thinkingBudget to reasoning_effort
+		// Always perform conversion to support allowCompat models that may not be in registry
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				budget := int(thinkingBudget.Int())
+				if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
+				}
+			}
+		}
 	}

 	// Stream parameter
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -2,6 +2,7 @@ package responses

 import (
 	"bytes"
+	"strings"

 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -64,7 +65,7 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 			}

 			switch itemType {
-			case "message":
+			case "message", "":
 				// Handle regular message conversion
 				role := item.Get("role").String()
 				message := `{"role":"","content":""}`
@@ -106,6 +107,8 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 					if len(toolCalls) > 0 {
 						message, _ = sjson.Set(message, "tool_calls", toolCalls)
 					}
+				} else if content.Type == gjson.String {
+					message, _ = sjson.Set(message, "content", content.String())
 				}

 				out, _ = sjson.SetRaw(out, "messages.-1", message)
@@ -189,23 +192,9 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 	}

 	if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() {
-		switch reasoningEffort.String() {
-		case "none":
-			out, _ = sjson.Set(out, "reasoning_effort", "none")
-		case "auto":
-			out, _ = sjson.Set(out, "reasoning_effort", "auto")
-		case "minimal":
-			out, _ = sjson.Set(out, "reasoning_effort", "low")
-		case "low":
-			out, _ = sjson.Set(out, "reasoning_effort", "low")
-		case "medium":
-			out, _ = sjson.Set(out, "reasoning_effort", "medium")
-		case "high":
-			out, _ = sjson.Set(out, "reasoning_effort", "high")
-		case "xhigh":
-			out, _ = sjson.Set(out, "reasoning_effort", "xhigh")
-		default:
-			out, _ = sjson.Set(out, "reasoning_effort", "auto")
+		effort := strings.ToLower(strings.TrimSpace(reasoningEffort.String()))
+		if effort != "" {
+			out, _ = sjson.Set(out, "reasoning_effort", effort)
 		}
 	}

--- a/internal/util/claude_thinking.go
+++ b/internal/util/claude_thinking.go
@@ -28,6 +28,9 @@ func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
 // It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
 // Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
 func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
+	if !ModelSupportsThinking(modelName) {
+		return nil, false
+	}
 	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
 	if !matched {
 		return nil, false
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -25,9 +25,15 @@ func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool)
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && budget != nil && *budget != 0 {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
 		valuePath := "generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
@@ -47,9 +53,15 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && budget != nil && *budget != 0 {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
 		valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
@@ -140,6 +152,71 @@ func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
 	return updated
 }

+// ReasoningEffortBudgetMapping defines the thinkingBudget values for each reasoning effort level.
+var ReasoningEffortBudgetMapping = map[string]int{
+	"none":    0,
+	"auto":    -1,
+	"minimal": 512,
+	"low":     1024,
+	"medium":  8192,
+	"high":    24576,
+	"xhigh":   32768,
+}
+
+// ApplyReasoningEffortToGemini applies OpenAI reasoning_effort to Gemini thinkingConfig
+// for standard Gemini API format (generationConfig.thinkingConfig path).
+// Returns the modified body with thinkingBudget and include_thoughts set.
+func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
+	normalized := strings.ToLower(strings.TrimSpace(effort))
+	if normalized == "" {
+		return body
+	}
+
+	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
+	includePath := "generationConfig.thinkingConfig.include_thoughts"
+
+	if normalized == "none" {
+		body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
+		return body
+	}
+
+	budget, ok := ReasoningEffortBudgetMapping[normalized]
+	if !ok {
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, budgetPath, budget)
+	body, _ = sjson.SetBytes(body, includePath, true)
+	return body
+}
+
+// ApplyReasoningEffortToGeminiCLI applies OpenAI reasoning_effort to Gemini CLI thinkingConfig
+// for Gemini CLI API format (request.generationConfig.thinkingConfig path).
+// Returns the modified body with thinkingBudget and include_thoughts set.
+func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
+	normalized := strings.ToLower(strings.TrimSpace(effort))
+	if normalized == "" {
+		return body
+	}
+
+	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
+	includePath := "request.generationConfig.thinkingConfig.include_thoughts"
+
+	if normalized == "none" {
+		body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
+		return body
+	}
+
+	budget, ok := ReasoningEffortBudgetMapping[normalized]
+	if !ok {
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, budgetPath, budget)
+	body, _ = sjson.SetBytes(body, includePath, true)
+	return body
+}
+
 // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
 // and converts it to "thinkingBudget".
 // "high" -> 32768
--- a/internal/util/openai_thinking.go
+++ b/internal/util/openai_thinking.go
@@ -0,0 +1,37 @@
+package util
+
+// OpenAIThinkingBudgetToEffort maps a numeric thinking budget (tokens)
+// into an OpenAI-style reasoning effort level for level-based models.
+//
+// Ranges:
+//   - 0            -> "none"
+//   - -1           -> "auto"
+//   - 1..1024      -> "low"
+//   - 1025..8192   -> "medium"
+//   - 8193..24576  -> "high"
+//   - 24577..      -> highest supported level for the model (defaults to "xhigh")
+//
+// Negative values other than -1 are treated as unsupported.
+func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) {
+	switch {
+	case budget == -1:
+		return "auto", true
+	case budget < -1:
+		return "", false
+	case budget == 0:
+		return "none", true
+	case budget > 0 && budget <= 1024:
+		return "low", true
+	case budget <= 8192:
+		return "medium", true
+	case budget <= 24576:
+		return "high", true
+	case budget > 24576:
+		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
+			return levels[len(levels)-1], true
+		}
+		return "xhigh", true
+	default:
+		return "", false
+	}
+}
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -25,33 +25,33 @@ func ModelSupportsThinking(model string) bool {
 // or min (0 if zero is allowed and mid <= 0).
 func NormalizeThinkingBudget(model string, budget int) int {
 	if budget == -1 { // dynamic
-		if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
+		if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
 			if dynamicAllowed {
 				return -1
 			}
-			mid := (min + max) / 2
+			mid := (minBudget + maxBudget) / 2
 			if mid <= 0 && zeroAllowed {
 				return 0
 			}
 			if mid <= 0 {
-				return min
+				return minBudget
 			}
 			return mid
 		}
 		return -1
 	}
-	if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
+	if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
 		if budget == 0 {
 			if zeroAllowed {
 				return 0
 			}
-			return min
+			return minBudget
 		}
-		if budget < min {
-			return min
+		if budget < minBudget {
+			return minBudget
 		}
-		if budget > max {
-			return max
+		if budget > maxBudget {
+			return maxBudget
 		}
 		return budget
 	}
@@ -105,3 +105,16 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 	}
 	return "", false
 }
+
+// IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
+// These models may not advertise Thinking metadata in the registry.
+func IsOpenAICompatibilityModel(model string) bool {
+	if model == "" {
+		return false
+	}
+	info := registry.GetGlobalRegistry().GetModelInfo(model)
+	if info == nil {
+		return false
+	}
+	return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility")
+}
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -163,6 +163,11 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*
 	if !matched {
 		return nil, nil, false
 	}
+	// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
+	// Claude/Gemini-style protocols, so we don't derive budgets for them here.
+	if ModelUsesThinkingLevels(model) {
+		return nil, nil, false
+	}

 	if budget == nil && effort != nil {
 		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -14,6 +14,7 @@ import (
 	"os"
 	"path/filepath"
 	"reflect"
+	"runtime"
 	"sort"
 	"strings"
 	"sync"
@@ -61,6 +62,7 @@ type Watcher struct {
 	reloadCallback    func(*config.Config)
 	watcher           *fsnotify.Watcher
 	lastAuthHashes    map[string]string
+	lastRemoveTimes   map[string]time.Time
 	lastConfigHash    string
 	authQueue         chan<- AuthUpdate
 	currentAuths      map[string]*coreauth.Auth
@@ -127,8 +129,9 @@ type AuthUpdate struct {
 const (
 	// replaceCheckDelay is a short delay to allow atomic replace (rename) to settle
 	// before deciding whether a Remove event indicates a real deletion.
-	replaceCheckDelay    = 50 * time.Millisecond
-	configReloadDebounce = 150 * time.Millisecond
+	replaceCheckDelay        = 50 * time.Millisecond
+	configReloadDebounce     = 150 * time.Millisecond
+	authRemoveDebounceWindow = 1 * time.Second
 )

 // NewWatcher creates a new file watcher instance
@@ -721,8 +724,9 @@ func (w *Watcher) authFileUnchanged(path string) (bool, error) {
 	sum := sha256.Sum256(data)
 	curHash := hex.EncodeToString(sum[:])

+	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.RLock()
-	prevHash, ok := w.lastAuthHashes[path]
+	prevHash, ok := w.lastAuthHashes[normalized]
 	w.clientsMutex.RUnlock()
 	if ok && prevHash == curHash {
 		return true, nil
@@ -731,19 +735,63 @@ func (w *Watcher) authFileUnchanged(path string) (bool, error) {
 }

 func (w *Watcher) isKnownAuthFile(path string) bool {
+	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.RLock()
 	defer w.clientsMutex.RUnlock()
-	_, ok := w.lastAuthHashes[path]
+	_, ok := w.lastAuthHashes[normalized]
 	return ok
 }

+func (w *Watcher) normalizeAuthPath(path string) string {
+	trimmed := strings.TrimSpace(path)
+	if trimmed == "" {
+		return ""
+	}
+	cleaned := filepath.Clean(trimmed)
+	if runtime.GOOS == "windows" {
+		cleaned = strings.TrimPrefix(cleaned, `\\?\`)
+		cleaned = strings.ToLower(cleaned)
+	}
+	return cleaned
+}
+
+func (w *Watcher) shouldDebounceRemove(normalizedPath string, now time.Time) bool {
+	if normalizedPath == "" {
+		return false
+	}
+	w.clientsMutex.Lock()
+	if w.lastRemoveTimes == nil {
+		w.lastRemoveTimes = make(map[string]time.Time)
+	}
+	if last, ok := w.lastRemoveTimes[normalizedPath]; ok {
+		if now.Sub(last) < authRemoveDebounceWindow {
+			w.clientsMutex.Unlock()
+			return true
+		}
+	}
+	w.lastRemoveTimes[normalizedPath] = now
+	if len(w.lastRemoveTimes) > 128 {
+		cutoff := now.Add(-2 * authRemoveDebounceWindow)
+		for p, t := range w.lastRemoveTimes {
+			if t.Before(cutoff) {
+				delete(w.lastRemoveTimes, p)
+			}
+		}
+	}
+	w.clientsMutex.Unlock()
+	return false
+}
+
 // handleEvent processes individual file system events
 func (w *Watcher) handleEvent(event fsnotify.Event) {
 	// Filter only relevant events: config file or auth-dir JSON files.
 	configOps := fsnotify.Write | fsnotify.Create | fsnotify.Rename
-	isConfigEvent := event.Name == w.configPath && event.Op&configOps != 0
+	normalizedName := w.normalizeAuthPath(event.Name)
+	normalizedConfigPath := w.normalizeAuthPath(w.configPath)
+	normalizedAuthDir := w.normalizeAuthPath(w.authDir)
+	isConfigEvent := normalizedName == normalizedConfigPath && event.Op&configOps != 0
 	authOps := fsnotify.Create | fsnotify.Write | fsnotify.Remove | fsnotify.Rename
-	isAuthJSON := strings.HasPrefix(event.Name, w.authDir) && strings.HasSuffix(event.Name, ".json") && event.Op&authOps != 0
+	isAuthJSON := strings.HasPrefix(normalizedName, normalizedAuthDir) && strings.HasSuffix(normalizedName, ".json") && event.Op&authOps != 0
 	if !isConfigEvent && !isAuthJSON {
 		// Ignore unrelated files (e.g., cookie snapshots *.cookie) and other noise.
 		return
@@ -761,6 +809,10 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {

 	// Handle auth directory changes incrementally (.json only)
 	if event.Op&(fsnotify.Remove|fsnotify.Rename) != 0 {
+		if w.shouldDebounceRemove(normalizedName, now) {
+			log.Debugf("debouncing remove event for %s", filepath.Base(event.Name))
+			return
+		}
 		// Atomic replace on some platforms may surface as Rename (or Remove) before the new file is ready.
 		// Wait briefly; if the path exists again, treat as an update instead of removal.
 		time.Sleep(replaceCheckDelay)
@@ -978,7 +1030,8 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string
 				if !info.IsDir() && strings.HasSuffix(strings.ToLower(info.Name()), ".json") {
 					if data, errReadFile := os.ReadFile(path); errReadFile == nil && len(data) > 0 {
 						sum := sha256.Sum256(data)
-						w.lastAuthHashes[path] = hex.EncodeToString(sum[:])
+						normalizedPath := w.normalizeAuthPath(path)
+						w.lastAuthHashes[normalizedPath] = hex.EncodeToString(sum[:])
 					}
 				}
 				return nil
@@ -1025,6 +1078,7 @@ func (w *Watcher) addOrUpdateClient(path string) {

 	sum := sha256.Sum256(data)
 	curHash := hex.EncodeToString(sum[:])
+	normalized := w.normalizeAuthPath(path)

 	w.clientsMutex.Lock()

@@ -1034,14 +1088,14 @@ func (w *Watcher) addOrUpdateClient(path string) {
 		w.clientsMutex.Unlock()
 		return
 	}
-	if prev, ok := w.lastAuthHashes[path]; ok && prev == curHash {
+	if prev, ok := w.lastAuthHashes[normalized]; ok && prev == curHash {
 		log.Debugf("auth file unchanged (hash match), skipping reload: %s", filepath.Base(path))
 		w.clientsMutex.Unlock()
 		return
 	}

 	// Update hash cache
-	w.lastAuthHashes[path] = curHash
+	w.lastAuthHashes[normalized] = curHash

 	w.clientsMutex.Unlock() // Unlock before the callback

@@ -1056,10 +1110,11 @@ func (w *Watcher) addOrUpdateClient(path string) {

 // removeClient handles the removal of a single client.
 func (w *Watcher) removeClient(path string) {
+	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.Lock()

 	cfg := w.config
-	delete(w.lastAuthHashes, path)
+	delete(w.lastAuthHashes, normalized)

 	w.clientsMutex.Unlock() // Release the lock before the callback

--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -7,7 +7,6 @@
 package claude

 import (
-	"bufio"
 	"bytes"
 	"compress/gzip"
 	"context"
@@ -219,52 +218,24 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 }

 func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	// v6.1: Intelligent Buffered Streamer strategy
-	// Enhanced buffering with larger buffer size (16KB) and longer flush interval (120ms).
-	// Smart flush only when buffer is sufficiently filled (≥50%), dramatically reducing
-	// flush frequency from ~12.5Hz to ~5-8Hz while maintaining low latency.
-	writer := bufio.NewWriterSize(c.Writer, 16*1024) // 4KB → 16KB
-	ticker := time.NewTicker(120 * time.Millisecond) // 80ms → 120ms
-	defer ticker.Stop()
-
-	var chunkIdx int
-
+	// OpenAI-style stream forwarding: write each SSE chunk and flush immediately.
+	// This guarantees clients see incremental output even for small responses.
 	for {
 		select {
 		case <-c.Request.Context().Done():
-			// Context cancelled, flush any remaining data before exit
-			_ = writer.Flush()
 			cancel(c.Request.Context().Err())
 			return

-		case <-ticker.C:
-			// Smart flush: only flush when buffer has sufficient data (≥50% full)
-			// This reduces flush frequency while ensuring data flows naturally
-			buffered := writer.Buffered()
-			if buffered >= 8*1024 { // At least 8KB (50% of 16KB buffer)
-				if err := writer.Flush(); err != nil {
-					// Error flushing, cancel and return
-					cancel(err)
-					return
-				}
-				flusher.Flush() // Also flush the underlying http.ResponseWriter
-			}
-
 		case chunk, ok := <-data:
 			if !ok {
-				// Stream ended, flush remaining data
-				_ = writer.Flush()
+				flusher.Flush()
 				cancel(nil)
 				return
 			}
-
-			// Forward the complete SSE event block directly (already formatted by the translator).
-			// The translator returns a complete SSE-compliant event block, including event:, data:, and separators.
-			// The handler just needs to forward it without reassembly.
 			if len(chunk) > 0 {
-				_, _ = writer.Write(chunk)
+				_, _ = c.Writer.Write(chunk)
+				flusher.Flush()
 			}
-			chunkIdx++

 		case errMsg, ok := <-errs:
 			if !ok {
@@ -276,21 +247,20 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 					status = errMsg.StatusCode
 				}
 				c.Status(status)
+
 				// An error occurred: emit as a proper SSE error event
 				errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
-				_, _ = writer.WriteString("event: error\n")
-				_, _ = writer.WriteString("data: ")
-				_, _ = writer.Write(errorBytes)
-				_, _ = writer.WriteString("\n\n")
-				_ = writer.Flush()
+				_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
 				flusher.Flush()
 			}
+
 			var execErr error
 			if errMsg != nil {
 				execErr = errMsg.Error
 			}
 			cancel(execErr)
 			return
+		case <-time.After(500 * time.Millisecond):
 		}
 	}
 }
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -116,19 +116,29 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 	newCtx = context.WithValue(newCtx, "gin", c)
 	newCtx = context.WithValue(newCtx, "handler", handler)
 	return newCtx, func(params ...interface{}) {
-		if h.Cfg.RequestLog {
-			if len(params) == 1 {
-				data := params[0]
-				switch data.(type) {
-				case []byte:
-					appendAPIResponse(c, data.([]byte))
-				case error:
-					appendAPIResponse(c, []byte(data.(error).Error()))
-				case string:
-					appendAPIResponse(c, []byte(data.(string)))
-				case bool:
-				case nil:
+		if h.Cfg.RequestLog && len(params) == 1 {
+			var payload []byte
+			switch data := params[0].(type) {
+			case []byte:
+				payload = data
+			case error:
+				if data != nil {
+					payload = []byte(data.Error())
 				}
+			case string:
+				payload = []byte(data)
+			}
+			if len(payload) > 0 {
+				if existing, exists := c.Get("API_RESPONSE"); exists {
+					if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
+						trimmedPayload := bytes.TrimSpace(payload)
+						if len(trimmedPayload) > 0 && bytes.Contains(existingBytes, trimmedPayload) {
+							cancel()
+							return
+						}
+					}
+				}
+				appendAPIResponse(c, payload)
 			}
 		}

--- a/sdk/auth/iflow.go
+++ b/sdk/auth/iflow.go
@@ -107,7 +107,7 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		return nil, fmt.Errorf("iflow authentication failed: missing account identifier")
 	}

-	fileName := fmt.Sprintf("iflow-%s.json", email)
+	fileName := fmt.Sprintf("iflow-%s-%d.json", email, time.Now().Unix())
 	metadata := map[string]any{
 		"email":         email,
 		"api_key":       tokenStorage.APIKey,
--- a/sdk/cliproxy/auth/manager.go
+++ b/sdk/cliproxy/auth/manager.go
@@ -375,10 +375,19 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
 		}

 		accountType, accountInfo := auth.AccountInfo()
+		proxyInfo := auth.ProxyInfo()
 		if accountType == "api_key" {
-			log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+			if proxyInfo != "" {
+				log.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
+			} else {
+				log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+			}
 		} else if accountType == "oauth" {
-			log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+			if proxyInfo != "" {
+				log.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
+			} else {
+				log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+			}
 		}

 		tried[auth.ID] = struct{}{}
@@ -423,10 +432,19 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
 		}

 		accountType, accountInfo := auth.AccountInfo()
+		proxyInfo := auth.ProxyInfo()
 		if accountType == "api_key" {
-			log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+			if proxyInfo != "" {
+				log.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
+			} else {
+				log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+			}
 		} else if accountType == "oauth" {
-			log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+			if proxyInfo != "" {
+				log.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
+			} else {
+				log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+			}
 		}

 		tried[auth.ID] = struct{}{}
@@ -471,10 +489,19 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
 		}

 		accountType, accountInfo := auth.AccountInfo()
+		proxyInfo := auth.ProxyInfo()
 		if accountType == "api_key" {
-			log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+			if proxyInfo != "" {
+				log.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
+			} else {
+				log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+			}
 		} else if accountType == "oauth" {
-			log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+			if proxyInfo != "" {
+				log.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
+			} else {
+				log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+			}
 		}

 		tried[auth.ID] = struct{}{}
--- a/sdk/cliproxy/auth/types.go
+++ b/sdk/cliproxy/auth/types.go
@@ -157,6 +157,20 @@ func (m *ModelState) Clone() *ModelState {
 	return &copyState
 }

+func (a *Auth) ProxyInfo() string {
+	if a == nil {
+		return ""
+	}
+	proxyStr := strings.TrimSpace(a.ProxyURL)
+	if proxyStr == "" {
+		return ""
+	}
+	if idx := strings.Index(proxyStr, "://"); idx > 0 {
+		return "via " + proxyStr[:idx] + " proxy"
+	}
+	return "via proxy"
+}
+
 func (a *Auth) AccountInfo() (string, string) {
 	if a == nil {
 		return "", ""
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -0,0 +1,760 @@
+package test
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible
+// model that should have reasoning effort passed through even if not in registry.
+// This simulates the allowCompat behavior from OpenAICompatExecutor.
+func isOpenAICompatModel(model string) bool {
+	return model == "openai-compat"
+}
+
+// registerCoreModels loads representative models across providers into the registry
+// so NormalizeThinkingBudget and level validation use real ranges.
+func registerCoreModels(t *testing.T) func() {
+	t.Helper()
+	reg := registry.GetGlobalRegistry()
+	uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano())
+	reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
+	reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels())
+	reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels())
+	reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels())
+	// Custom openai-compatible model with forced thinking suffix passthrough.
+	// No Thinking field - simulates an external model added via openai-compat
+	// where the registry has no knowledge of its thinking capabilities.
+	// The allowCompat flag should preserve reasoning effort for such models.
+	customOpenAIModels := []*registry.ModelInfo{
+		{
+			ID:          "openai-compat",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "custom-provider",
+			Type:        "openai",
+			DisplayName: "OpenAI Compatible Model",
+			Description: "OpenAI-compatible model with forced thinking suffix support",
+		},
+	}
+	reg.RegisterClient(uid+"-custom-openai", "codex", customOpenAIModels)
+	return func() {
+		reg.UnregisterClient(uid + "-gemini")
+		reg.UnregisterClient(uid + "-claude")
+		reg.UnregisterClient(uid + "-openai")
+		reg.UnregisterClient(uid + "-qwen")
+		reg.UnregisterClient(uid + "-custom-openai")
+	}
+}
+
+var (
+	thinkingTestModels = []string{
+		"gpt-5",           // level-based thinking model
+		"gemini-2.5-pro",  // numeric-budget thinking model
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // allowCompat=true (OpenAI-compatible channel)
+	}
+	thinkingTestFromProtocols = []string{"openai", "claude", "gemini", "openai-response"}
+	thinkingTestToProtocols   = []string{"gemini", "claude", "openai", "codex"}
+
+	// Numeric budgets and their level equivalents:
+	// -1 -> auto
+	// 0 -> none
+	// 1..1024 -> low
+	// 1025..8192 -> medium
+	// 8193..24576 -> high
+	// >24576 -> model highest level (right-most in Levels)
+	thinkingNumericSamples = []int{-1, 0, 1023, 1025, 8193, 64000}
+
+	// Levels and their numeric equivalents:
+	// auto -> -1
+	// none -> 0
+	// minimal -> 512
+	// low -> 1024
+	// medium -> 8192
+	// high -> 24576
+	// xhigh -> 32768
+	// invalid -> invalid (no mapping)
+	thinkingLevelSamples = []string{"auto", "none", "minimal", "low", "medium", "high", "xhigh", "invalid"}
+)
+
+func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
+	switch fromProtocol {
+	case "gemini":
+		return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix))
+	case "openai-response":
+		return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix))
+	default: // openai / claude and other chat-style payloads
+		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix))
+	}
+}
+
+// normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
+func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) {
+	body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
+	if err := executor.ValidateThinkingConfig(body, upstreamModel); err != nil {
+		return body, err
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	body, _ = sjson.SetBytes(body, "stream", true)
+	body, _ = sjson.DeleteBytes(body, "previous_response_id")
+	return body, nil
+}
+
+// buildBodyForProtocol runs a minimal request through the same translation and
+// thinking pipeline used in executors for the given target protocol.
+func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) {
+	t.Helper()
+	normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix)
+	upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata)
+	raw := buildRawPayload(fromProtocol, modelWithSuffix)
+	stream := fromProtocol != toProtocol
+
+	body := sdktranslator.TranslateRequest(
+		sdktranslator.FromString(fromProtocol),
+		sdktranslator.FromString(toProtocol),
+		normalizedModel,
+		raw,
+		stream,
+	)
+
+	var err error
+	allowCompat := isOpenAICompatModel(normalizedModel)
+	switch toProtocol {
+	case "gemini":
+		body = executor.ApplyThinkingMetadata(body, metadata, normalizedModel)
+		body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body)
+		body = util.NormalizeGeminiThinkingBudget(normalizedModel, body)
+		body = util.StripThinkingConfigIfUnsupported(normalizedModel, body)
+	case "claude":
+		if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok {
+			body = util.ApplyClaudeThinkingConfig(body, budget)
+		}
+	case "openai":
+		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning_effort", allowCompat)
+		body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
+		err = executor.ValidateThinkingConfig(body, upstreamModel)
+	case "codex": // OpenAI responses / codex
+		// Codex does not support allowCompat; always use false.
+		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning.effort", false)
+		// Mirror CodexExecutor final normalization and model override so tests log the final body.
+		body, err = normalizeCodexPayload(body, upstreamModel, false)
+	default:
+	}
+
+	// Mirror executor behavior: final payload uses the upstream (base) model name.
+	if upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
+
+	// For tests we only keep model + thinking-related fields to avoid noise.
+	body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel)
+	return body, err
+}
+
+// filterThinkingBody projects the translated payload down to only model and
+// thinking-related fields for the given target protocol.
+func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte {
+	if len(body) == 0 {
+		return body
+	}
+	out := []byte(`{}`)
+
+	// Preserve model if present, otherwise fall back to upstream/normalized model.
+	if m := gjson.GetBytes(body, "model"); m.Exists() {
+		out, _ = sjson.SetBytes(out, "model", m.Value())
+	} else if upstreamModel != "" {
+		out, _ = sjson.SetBytes(out, "model", upstreamModel)
+	} else if normalizedModel != "" {
+		out, _ = sjson.SetBytes(out, "model", normalizedModel)
+	}
+
+	switch toProtocol {
+	case "gemini":
+		if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() {
+			out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw))
+		}
+	case "claude":
+		if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() {
+			out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw))
+		}
+	case "openai":
+		if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() {
+			out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value())
+		}
+	case "codex":
+		if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() {
+			out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value())
+		}
+	}
+	return out
+}
+
+func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
+	cleanup := registerCoreModels(t)
+	defer cleanup()
+
+	type scenario struct {
+		name        string
+		modelSuffix string
+	}
+
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "numeric-neg1"
+		}
+		return fmt.Sprintf("numeric-%d", budget)
+	}
+
+	for _, model := range thinkingTestModels {
+		_ = registry.GetGlobalRegistry().GetModelInfo(model)
+
+		for _, from := range thinkingTestFromProtocols {
+			// Scenario selection follows protocol semantics:
+			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
+			// - Claude/Gemini-style protocols express thinking as numeric budgets.
+			cases := []scenario{
+				{name: "no-suffix", modelSuffix: model},
+			}
+			if from == "openai" || from == "openai-response" {
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:        "level-" + lvl,
+						modelSuffix: fmt.Sprintf("%s(%s)", model, lvl),
+					})
+				}
+			} else { // claude or gemini
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:        numericName(budget),
+						modelSuffix: fmt.Sprintf("%s(%d)", model, budget),
+					})
+				}
+			}
+
+			for _, to := range thinkingTestToProtocols {
+				if from == to {
+					continue
+				}
+				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
+				t.Logf("  %s -> %s | model: %s", from, to, model)
+				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
+				for _, cs := range cases {
+					from := from
+					to := to
+					cs := cs
+					testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name)
+					t.Run(testName, func(t *testing.T) {
+						normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix)
+						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
+							switch to {
+							case "gemini":
+								budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata)
+								if !ok || !util.ModelSupportsThinking(normalizedModel) {
+									return false, "", false
+								}
+								if include != nil && !*include {
+									return false, "", false
+								}
+								if budget == nil {
+									return false, "", false
+								}
+								norm := util.NormalizeThinkingBudget(normalizedModel, *budget)
+								return true, fmt.Sprintf("%d", norm), false
+							case "claude":
+								if !util.ModelSupportsThinking(normalizedModel) {
+									return false, "", false
+								}
+								budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata)
+								if !ok || budget == nil {
+									return false, "", false
+								}
+								return true, fmt.Sprintf("%d", *budget), false
+							case "openai":
+								allowCompat := isOpenAICompatModel(normalizedModel)
+								if !util.ModelSupportsThinking(normalizedModel) && !allowCompat {
+									return false, "", false
+								}
+								// For allowCompat models, pass through effort directly without validation
+								if allowCompat {
+									effort, ok := util.ReasoningEffortFromMetadata(metadata)
+									if ok && strings.TrimSpace(effort) != "" {
+										return true, strings.ToLower(strings.TrimSpace(effort)), false
+									}
+									// Check numeric budget fallback for allowCompat
+									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+										if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
+											return true, mapped, false
+										}
+									}
+									return false, "", false
+								}
+								if !util.ModelUsesThinkingLevels(normalizedModel) {
+									// Non-levels models don't support effort strings in openai
+									return false, "", false
+								}
+								effort, ok := util.ReasoningEffortFromMetadata(metadata)
+								if !ok || strings.TrimSpace(effort) == "" {
+									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+										if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap {
+											effort = mapped
+											ok = true
+										}
+									}
+								}
+								if !ok || strings.TrimSpace(effort) == "" {
+									return false, "", false
+								}
+								effort = strings.ToLower(strings.TrimSpace(effort))
+								if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
+									return true, normalized, false
+								}
+								return false, "", true // validation would fail
+							case "codex":
+								// Codex does not support allowCompat; require thinking-capable level models.
+								if !util.ModelSupportsThinking(normalizedModel) || !util.ModelUsesThinkingLevels(normalizedModel) {
+									return false, "", false
+								}
+								effort, ok := util.ReasoningEffortFromMetadata(metadata)
+								if ok && strings.TrimSpace(effort) != "" {
+									effort = strings.ToLower(strings.TrimSpace(effort))
+									if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
+										return true, normalized, false
+									}
+									return false, "", true
+								}
+								if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+									if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
+										mapped = strings.ToLower(strings.TrimSpace(mapped))
+										if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel {
+											return true, normalized, false
+										}
+										return false, "", true
+									}
+								}
+								if from != "openai-response" {
+									// Codex translators default reasoning.effort to "medium" when
+									// no explicit thinking suffix/metadata is provided.
+									return true, "medium", false
+								}
+								return false, "", false
+							default:
+								return false, "", false
+							}
+						}()
+
+						body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix)
+						actualPresent, actualValue := func() (bool, string) {
+							path := ""
+							switch to {
+							case "gemini":
+								path = "generationConfig.thinkingConfig.thinkingBudget"
+							case "claude":
+								path = "thinking.budget_tokens"
+							case "openai":
+								path = "reasoning_effort"
+							case "codex":
+								path = "reasoning.effort"
+							}
+							if path == "" {
+								return false, ""
+							}
+							val := gjson.GetBytes(body, path)
+							if to == "codex" && !val.Exists() {
+								reasoning := gjson.GetBytes(body, "reasoning")
+								if reasoning.Exists() {
+									val = reasoning.Get("effort")
+								}
+							}
+							if !val.Exists() {
+								return false, ""
+							}
+							if val.Type == gjson.Number {
+								return true, fmt.Sprintf("%d", val.Int())
+							}
+							return true, val.String()
+						}()
+
+						t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
+							from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
+
+						if expectErr {
+							if err == nil {
+								t.Fatalf("expected validation error but got none, body=%s", string(body))
+							}
+							return
+						}
+						if err != nil {
+							t.Fatalf("unexpected error: %v body=%s", err, string(body))
+						}
+
+						if expectPresent != actualPresent {
+							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
+						}
+						if expectPresent && expectValue != actualValue {
+							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
+						}
+					})
+				}
+			}
+		}
+	}
+}
+
+// buildRawPayloadWithThinking creates a payload with thinking parameters already in the body.
+// This tests the path where thinking comes from the raw payload, not model suffix.
+func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any) []byte {
+	switch fromProtocol {
+	case "gemini":
+		base := fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, model)
+		if budget, ok := thinkingParam.(int); ok {
+			base, _ = sjson.Set(base, "generationConfig.thinkingConfig.thinkingBudget", budget)
+		}
+		return []byte(base)
+	case "openai-response":
+		base := fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, model)
+		if effort, ok := thinkingParam.(string); ok && effort != "" {
+			base, _ = sjson.Set(base, "reasoning.effort", effort)
+		}
+		return []byte(base)
+	case "openai":
+		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
+		if effort, ok := thinkingParam.(string); ok && effort != "" {
+			base, _ = sjson.Set(base, "reasoning_effort", effort)
+		}
+		return []byte(base)
+	case "claude":
+		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
+		if budget, ok := thinkingParam.(int); ok {
+			base, _ = sjson.Set(base, "thinking.type", "enabled")
+			base, _ = sjson.Set(base, "thinking.budget_tokens", budget)
+		}
+		return []byte(base)
+	default:
+		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model))
+	}
+}
+
+// buildBodyForProtocolWithRawThinking translates payload with raw thinking params.
+func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, model string, thinkingParam any) ([]byte, error) {
+	t.Helper()
+	raw := buildRawPayloadWithThinking(fromProtocol, model, thinkingParam)
+	stream := fromProtocol != toProtocol
+
+	body := sdktranslator.TranslateRequest(
+		sdktranslator.FromString(fromProtocol),
+		sdktranslator.FromString(toProtocol),
+		model,
+		raw,
+		stream,
+	)
+
+	var err error
+	allowCompat := isOpenAICompatModel(model)
+	switch toProtocol {
+	case "gemini":
+		body = util.ApplyDefaultThinkingIfNeeded(model, body)
+		body = util.NormalizeGeminiThinkingBudget(model, body)
+		body = util.StripThinkingConfigIfUnsupported(model, body)
+	case "claude":
+		// For raw payload, Claude thinking is passed through by translator
+		// No additional processing needed as thinking is already in body
+	case "openai":
+		body = executor.NormalizeThinkingConfig(body, model, allowCompat)
+		err = executor.ValidateThinkingConfig(body, model)
+	case "codex":
+		// Codex does not support allowCompat; always use false.
+		body, err = normalizeCodexPayload(body, model, false)
+	}
+
+	body, _ = sjson.SetBytes(body, "model", model)
+	body = filterThinkingBody(toProtocol, body, model, model)
+	return body, err
+}
+
+func TestRawPayloadThinkingConversions(t *testing.T) {
+	cleanup := registerCoreModels(t)
+	defer cleanup()
+
+	type scenario struct {
+		name          string
+		thinkingParam any // int for budget, string for effort level
+	}
+
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "budget-neg1"
+		}
+		return fmt.Sprintf("budget-%d", budget)
+	}
+
+	for _, model := range thinkingTestModels {
+		supportsThinking := util.ModelSupportsThinking(model)
+		usesLevels := util.ModelUsesThinkingLevels(model)
+		allowCompat := isOpenAICompatModel(model)
+
+		for _, from := range thinkingTestFromProtocols {
+			var cases []scenario
+			switch from {
+			case "openai", "openai-response":
+				cases = []scenario{
+					{name: "no-thinking", thinkingParam: nil},
+				}
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:          "effort-" + lvl,
+						thinkingParam: lvl,
+					})
+				}
+			case "gemini", "claude":
+				cases = []scenario{
+					{name: "no-thinking", thinkingParam: nil},
+				}
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:          numericName(budget),
+						thinkingParam: budget,
+					})
+				}
+			}
+
+			for _, to := range thinkingTestToProtocols {
+				if from == to {
+					continue
+				}
+				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
+				t.Logf("  RAW PAYLOAD: %s -> %s | model: %s", from, to, model)
+				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
+
+				for _, cs := range cases {
+					from := from
+					to := to
+					cs := cs
+					testName := fmt.Sprintf("raw/%s->%s/%s/%s", from, to, model, cs.name)
+					t.Run(testName, func(t *testing.T) {
+						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
+							if cs.thinkingParam == nil {
+								if to == "codex" && from != "openai-response" && supportsThinking && usesLevels {
+									// Codex translators default reasoning.effort to "medium" for thinking-capable level models
+									return true, "medium", false
+								}
+								return false, "", false
+							}
+
+							switch to {
+							case "gemini":
+								if !supportsThinking || usesLevels {
+									return false, "", false
+								}
+								// Gemini expects numeric budget (only for non-level models)
+								if budget, ok := cs.thinkingParam.(int); ok {
+									norm := util.NormalizeThinkingBudget(model, budget)
+									return true, fmt.Sprintf("%d", norm), false
+								}
+								// Convert effort level to budget for non-level models only
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" disables thinking - no thinkingBudget in output
+									if strings.ToLower(effort) == "none" {
+										return false, "", false
+									}
+									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
+										// ThinkingEffortToBudget already returns normalized budget
+										return true, fmt.Sprintf("%d", budget), false
+									}
+									// Invalid effort does not map to a budget
+									return false, "", false
+								}
+								return false, "", false
+							case "claude":
+								if !supportsThinking || usesLevels {
+									return false, "", false
+								}
+								// Claude expects numeric budget (only for non-level models)
+								if budget, ok := cs.thinkingParam.(int); ok && budget > 0 {
+									norm := util.NormalizeThinkingBudget(model, budget)
+									return true, fmt.Sprintf("%d", norm), false
+								}
+								// Convert effort level to budget for non-level models only
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" and "auto" don't produce budget_tokens
+									lower := strings.ToLower(effort)
+									if lower == "none" || lower == "auto" {
+										return false, "", false
+									}
+									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
+										// ThinkingEffortToBudget already returns normalized budget
+										return true, fmt.Sprintf("%d", budget), false
+									}
+									// Invalid effort - claude sets thinking.type:enabled but no budget_tokens
+									return false, "", false
+								}
+								return false, "", false
+							case "openai":
+								if allowCompat {
+									if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" {
+										normalized := strings.ToLower(strings.TrimSpace(effort))
+										return true, normalized, false
+									}
+									if budget, ok := cs.thinkingParam.(int); ok {
+										if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+											return true, mapped, false
+										}
+									}
+									return false, "", false
+								}
+								if !supportsThinking || !usesLevels {
+									return false, "", false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
+										return true, normalized, false
+									}
+									return false, "", true // invalid level
+								}
+								if budget, ok := cs.thinkingParam.(int); ok {
+									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
+										return true, mapped, false
+									}
+								}
+								return false, "", false
+							case "codex":
+								// Codex does not support allowCompat; require thinking-capable level models.
+								if !supportsThinking || !usesLevels {
+									return false, "", false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
+										return true, normalized, false
+									}
+									return false, "", true
+								}
+								if budget, ok := cs.thinkingParam.(int); ok {
+									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
+										return true, mapped, false
+									}
+								}
+								if from != "openai-response" {
+									// Codex translators default reasoning.effort to "medium" for thinking-capable models
+									return true, "medium", false
+								}
+								return false, "", false
+							}
+							return false, "", false
+						}()
+
+						body, err := buildBodyForProtocolWithRawThinking(t, from, to, model, cs.thinkingParam)
+						actualPresent, actualValue := func() (bool, string) {
+							path := ""
+							switch to {
+							case "gemini":
+								path = "generationConfig.thinkingConfig.thinkingBudget"
+							case "claude":
+								path = "thinking.budget_tokens"
+							case "openai":
+								path = "reasoning_effort"
+							case "codex":
+								path = "reasoning.effort"
+							}
+							if path == "" {
+								return false, ""
+							}
+							val := gjson.GetBytes(body, path)
+							if to == "codex" && !val.Exists() {
+								reasoning := gjson.GetBytes(body, "reasoning")
+								if reasoning.Exists() {
+									val = reasoning.Get("effort")
+								}
+							}
+							if !val.Exists() {
+								return false, ""
+							}
+							if val.Type == gjson.Number {
+								return true, fmt.Sprintf("%d", val.Int())
+							}
+							return true, val.String()
+						}()
+
+						t.Logf("from=%s to=%s model=%s param=%v present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
+							from, to, model, cs.thinkingParam, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
+
+						if expectErr {
+							if err == nil {
+								t.Fatalf("expected validation error but got none, body=%s", string(body))
+							}
+							return
+						}
+						if err != nil {
+							t.Fatalf("unexpected error: %v body=%s", err, string(body))
+						}
+
+						if expectPresent != actualPresent {
+							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
+						}
+						if expectPresent && expectValue != actualValue {
+							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
+						}
+					})
+				}
+			}
+		}
+	}
+}
+
+func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
+	cleanup := registerCoreModels(t)
+	defer cleanup()
+
+	cases := []struct {
+		name   string
+		model  string
+		budget int
+		want   string
+		ok     bool
+	}{
+		{name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true},
+		{name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true},
+		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
+		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
+		{name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true},
+		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
+		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
+		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
+		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true},
+		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true},
+		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
+	}
+
+	for _, cs := range cases {
+		cs := cs
+		t.Run(cs.name, func(t *testing.T) {
+			got, ok := util.OpenAIThinkingBudgetToEffort(cs.model, cs.budget)
+			if ok != cs.ok {
+				t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok)
+			}
+			if got != cs.want {
+				t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got)
+			}
+		})
+	}
+}
Author	SHA1	Message	Date
Luis Pater	d9a65745df	fix(translator): handle empty item type and string content in OpenAI response parser	2025-12-15 20:35:52 +08:00
Luis Pater	f6720f8dfa	Merge pull request #547 from router-for-me/amp feat(amp): require API key authentication for management routes	2025-12-15 16:14:49 +08:00
Chén Mù	e19ab3a066	Merge pull request #543 from router-for-me/log feat(auth): add proxy information to debug logs	2025-12-15 15:59:16 +08:00
hkfires	8f1dd69e72	feat(amp): require API key authentication for management routes All Amp management endpoints (e.g., /api/user, /threads) are now protected by the standard API key authentication middleware. This ensures that all management operations require a valid API key, significantly improving security. As a result of this change: - The `restrict-management-to-localhost` setting now defaults to `false`. API key authentication provides a stronger and more flexible security control than IP-based restrictions, improving usability in containerized environments. - The reverse proxy logic now strips the client's `Authorization` header after authenticating the initial request. It then injects the configured `upstream-api-key` for the request to the upstream Amp service. BREAKING CHANGE: Amp management endpoints now require a valid API key for authentication. Requests without a valid API key in the `Authorization` header will be rejected with a 401 Unauthorized error.	2025-12-15 13:24:53 +08:00
hkfires	f26da24a2f	feat(auth): add proxy information to debug logs	2025-12-15 13:14:55 +08:00
Luis Pater	8e4fbcaa7d	Merge pull request #533 from router-for-me/think refactor(thinking): centralize reasoning effort mapping and normalize budget values	2025-12-15 10:34:41 +08:00
hkfires	09c339953d	fix(openai): forward reasoning.effort value Drop the hardcoded effort mapping in request conversion so unknown values are preserved instead of being coerced to `auto	2025-12-15 09:16:15 +08:00
hkfires	367a05bdf6	refactor(thinking): export thinking helpers Expose thinking/effort normalization helpers from the executor package so conversion tests use production code and stay aligned with runtime validation behavior.	2025-12-15 09:16:15 +08:00
hkfires	d20b71deb9	fix(thinking): normalize effort mapping Route OpenAI reasoning effort through ThinkingEffortToBudget for Claude translators, preserve "minimal" when translating OpenAI Responses, and treat blank/unknown efforts as no-ops for Gemini thinking configs. Also map budget -1 to "auto" and expand cross-protocol thinking tests.	2025-12-15 09:16:15 +08:00
hkfires	712ce9f781	fix(thinking): drop unsupported none effort When budget 0 maps to "none" for models that use thinking levels but don't support that effort level, strip thinking fields instead of setting an invalid reasoning_effort value. Tests now expect removal for this edge case.	2025-12-15 09:16:14 +08:00
hkfires	a4a3274a55	test(thinking): expand conversion edge case coverage	2025-12-15 09:16:14 +08:00
hkfires	716aa71f6e	fix(thinking): centralize reasoning_effort mapping Move OpenAI `reasoning_effort` -> Gemini `thinkingConfig` budget logic into shared helpers used by Gemini, Gemini CLI, and antigravity translators. Normalize Claude thinking handling by preferring positive budgets, applying budget token normalization, and gating by model support. Always convert Gemini `thinkingBudget` back to OpenAI `reasoning_effort` to support allowCompat models, and update tests for normalization behavior.	2025-12-15 09:16:14 +08:00
hkfires	e8976f9898	fix(thinking): map budgets to effort for level models	2025-12-15 09:16:14 +08:00
hkfires	8496cc2444	test(thinking): cover openai-compat reasoning passthrough	2025-12-15 09:16:14 +08:00
hkfires	5ef2d59e05	fix(thinking): gate reasoning effort by model support Only map OpenAI reasoning effort to Claude thinking for models that support thinking and use budget tokens (not level-based thinking). Also add "xhigh" effort mapping and adjust minimal/low budgets, with new raw-payload conversion tests across protocols and models.	2025-12-15 09:16:14 +08:00
Chén Mù	07bb89ae80	Merge pull request #542 from router-for-me/aistudio	2025-12-15 09:13:25 +08:00
hkfires	27a5ad8ec2	Fixed: #534 fix(aistudio): correct JSON string boundary detection for backslash sequences	2025-12-15 09:00:14 +08:00
Luis Pater	707b07c5f5	Merge pull request #537 from sukakcoding/fix/function-response-fallback fix: handle malformed json in function response parsing	2025-12-15 03:31:09 +08:00
sukakcoding	4a764afd76	refactor: extract parseFunctionResponse helper to reduce duplication	2025-12-15 01:05:36 +08:00
sukakcoding	ecf49d574b	fix: handle malformed json in function response parsing	2025-12-15 00:59:46 +08:00
Luis Pater	5a75ef8ffd	Merge pull request #536 from AoaoMH/feature/auth-model-check feat: using Client Model Infos;	2025-12-15 00:29:33 +08:00
Test	07279f8746	feat: using Client Model Infos;	2025-12-15 00:13:05 +08:00
Luis Pater	71f788b13a	fix(registry): remove unused `ThinkingSupport` from DeepSeek-R1 model	2025-12-14 21:30:17 +08:00
Luis Pater	59c62dc580	fix(registry): correct DeepSeek-V3.2 experimental model ID	2025-12-14 21:27:43 +08:00
Luis Pater	d5310a3300	Merge pull request #531 from AoaoMH/feature/auth-model-check feat: add API endpoint to query models for auth credentials	2025-12-14 16:46:43 +08:00
Luis Pater	f0a3eb574e	fix(registry): update DeepSeek model definitions with new IDs and descriptions	2025-12-14 16:17:11 +08:00
Test	bb15855443	feat: add API endpoint to query models for auth credentials	2025-12-14 15:16:26 +08:00
Luis Pater	14ce6aebd1	Merge pull request #449 from sususu98/fix/gemini-cli-429-retry-delay-parsing fix(gemini-cli): enhance 429 retry delay parsing	2025-12-14 14:04:14 +08:00
Luis Pater	2fe83723f2	Merge pull request #515 from teeverc/fix/response-rewriter-streaming-flush fix(amp): flush response buffer after each streaming chunk write	2025-12-14 13:26:05 +08:00
teeverc	cd8c86c6fb	refactor: only flush stream response on successful write	2025-12-13 13:32:54 -08:00
teeverc	52d5fd1a67	fix: streaming for amp cli	2025-12-13 13:17:53 -08:00
Luis Pater	b6ad243e9e	Merge pull request #498 from teeverc/fix/claude-streaming-flush fix(claude): flush Claude SSE chunks immediately	2025-12-13 23:58:34 +08:00
Luis Pater	660aabc437	fix(executor): add `allowCompat` support for reasoning effort normalization Introduced `allowCompat` parameter to improve compatibility handling for reasoning effort in payloads across OpenAI and similar models.	2025-12-13 04:06:02 +08:00
Luis Pater	566120e8d5	Merge pull request #505 from router-for-me/think fix(thinking): map budgets to effort levels	2025-12-12 22:17:11 +08:00
Luis Pater	f3f0f1717d	Merge branch 'dev' into think	2025-12-12 22:16:44 +08:00
Luis Pater	7621ec609e	Merge pull request #501 from huynguyen03dev/fix/openai-compat-model-alias-resolution fix(openai-compat): prevent model alias from being overwritten	2025-12-12 21:58:15 +08:00
Luis Pater	9f511f0024	fix(executor): improve model compatibility handling for OpenAI-compatibility Enhances payload handling by introducing OpenAI-compatibility checks and refining how reasoning metadata is resolved, ensuring broader model support.	2025-12-12 21:57:25 +08:00
hkfires	374faa2640	fix(thinking): map budgets to effort levels Ensure thinking settings translate correctly across providers: - Only apply reasoning_effort to level-based models and derive it from numeric budget suffixes when present - Strip effort string fields for budget-based models and skip Claude/Gemini budget resolution for level-based or unsupported models - Default Gemini include_thoughts when a nonzero budget override is set - Add cross-protocol conversion and budget range tests	2025-12-12 21:33:20 +08:00
Luis Pater	1c52a89535	Merge pull request #502 from router-for-me/iflow fix(auth): prevent duplicate iflow BXAuth tokens	2025-12-12 20:03:37 +08:00
hkfires	e7cedbee6e	fix(auth): prevent duplicate iflow BXAuth tokens	2025-12-12 19:57:19 +08:00
Luis Pater	b8194e717c	Merge pull request #500 from router-for-me/think fix(codex): raise default reasoning effort to medium	2025-12-12 18:35:26 +08:00
huynguyen03.dev	15c3cc3a50	fix(openai-compat): prevent model alias from being overwritten by ResolveOriginalModel When using OpenAI-compatible providers with model aliases (e.g., glm-4.6-zai -> glm-4.6), the alias resolution was correctly applied but then immediately overwritten by ResolveOriginalModel, causing 'Unknown Model' errors from upstream APIs. This fix skips the ResolveOriginalModel override when a model alias has already been resolved, ensuring the correct model name is sent to the upstream provider. Co-authored-by: Amp <amp@ampcode.com>	2025-12-12 17:20:24 +07:00
hkfires	d131435e25	fix(codex): raise default reasoning effort to medium	2025-12-12 18:18:48 +08:00
Luis Pater	6e43669498	Fixed: #440 feat(watcher): normalize auth file paths and implement debounce for remove events	2025-12-12 16:50:56 +08:00
teeverc	5ab3032335	Update sdk/api/handlers/claude/code_handlers.go thank you gemini Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2025-12-12 00:26:01 -08:00
teeverc	1215c635a0	fix: flush Claude SSE chunks immediately to match OpenAI behavior - Write each SSE chunk directly to c.Writer and flush immediately - Remove buffered writer and ticker-based flushing that caused delayed output - Add 500ms timeout case for consistency with OpenAI/Gemini handlers - Clean up unused bufio import This fixes the 'not streaming' issue where small responses were held in the buffer until timeout/threshold was reached. Amp-Thread-ID: https://ampcode.com/threads/T-019b1186-164e-740c-96ab-856f64ee6bee Co-authored-by: Amp <amp@ampcode.com>	2025-12-12 00:14:19 -08:00
Luis Pater	fc054db51a	Merge pull request #494 from ben-vargas/fix-gpt-reasoning-none fix(models): add "none" reasoning effort level to gpt-5.2	2025-12-12 08:53:19 +08:00
Luis Pater	6e2306a5f2	refactor(handlers): improve request logging and payload handling	2025-12-12 08:52:52 +08:00
Ben Vargas	b09e2115d1	fix(models): add "none" reasoning effort level to gpt-5.2 Per OpenAI API documentation, gpt-5.2 supports reasoning_effort values of "none", "low", "medium", "high", and "xhigh". The "none" level was missing from the model definition. Reference: https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort	2025-12-11 15:26:23 -07:00
sususu	07d21463ca	fix(gemini-cli): enhance 429 retry delay parsing Add fallback parsing for quota reset delay when RetryInfo is not present: - Try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms") - Parse from error.message "Your quota will reset after Xs." This ensures proper cooldown timing for rate-limited requests. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-11 09:34:39 +08:00