v6 version first commit

2026-02-03 04:50:52 +08:00 · 2025-09-22 01:40:24 +08:00
parent d42384cdb7
commit 4999fce7f4
171 changed files with 7626 additions and 7494 deletions
--- a/internal/api/handlers/claude/code_handlers.go
+++ b/internal/api/handlers/claude/code_handlers.go
@@ -7,18 +7,17 @@
 package claude

 import (
+	"bytes"
 	"context"
 	"fmt"
 	"net/http"
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers"
-	. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/registry"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
-	log "github.com/sirupsen/logrus"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/tidwall/gjson"
 )

@@ -129,111 +128,47 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 	// This allows proper cleanup and cancellation of ongoing requests
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())

-	var cliClient interfaces.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		// This prevents deadlocks and ensures proper resource cleanup
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	h.forwardClaudeStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
+}

-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-	// Main client rotation loop with quota management
-	// This loop implements a sophisticated load balancing and failover mechanism
-outLoop:
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
+func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
 			return
-		}
-
-		// Initiate streaming communication with the backend client using raw JSON
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, "")
-
-		// Main streaming loop - handles multiple concurrent events using Go channels
-		// This select statement manages four different types of events simultaneously
-		for {
-			select {
-			// Case 1: Handle client disconnection
-			// Detects when the HTTP client has disconnected and cleans up resources
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("claude client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request to prevent resource leaks
-					return
-				}
-
-			// Case 2: Process incoming response chunks from the backend
-			// This handles the actual streaming data from the AI model
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				_, _ = c.Writer.Write(chunk)
-				_, _ = c.Writer.Write([]byte("\n"))
-			// Case 3: Handle errors from the backend
-			// This manages various error conditions and implements retry logic
-			case errInfo, okError := <-errChan:
-				if okError {
-					errorResponse = errInfo
-					h.LoggingAPIResponseError(cliCtx, errInfo)
-					// Special handling for quota exceeded errors
-					// If configured, attempt to switch to a different project/client
-					switch errInfo.StatusCode {
-					case 429:
-						if h.Cfg.QuotaExceeded.SwitchProject {
-							log.Debugf("quota exceeded, switch client")
-							continue outLoop // Restart the client selection process
-						}
-					case 403, 408, 500, 502, 503, 504:
-						log.Debugf("http status code %d, switch client, %s", errInfo.StatusCode, util.HideAPIKey(cliClient.GetEmail()))
-						retryCount++
-						continue outLoop
-					case 401:
-						log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-						err := cliClient.RefreshTokens(cliCtx)
-						if err != nil {
-							log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-							cliClient.SetUnavailable()
-						}
-						retryCount++
-						continue outLoop
-					case 402:
-						cliClient.SetUnavailable()
-						continue outLoop
-					default:
-						// Forward other errors directly to the client
-						c.Status(errInfo.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
-						flusher.Flush()
-						cliCancel(errInfo.Error)
-					}
-					return
-				}
-
-			// Case 4: Send periodic keep-alive signals
-			// Prevents connection timeouts during long-running requests
-			case <-time.After(500 * time.Millisecond):
+		case chunk, ok := <-data:
+			if !ok {
+				flusher.Flush()
+				cancel(nil)
+				return
 			}
-		}
-	}

-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-		flusher.Flush()
-		cliCancel(errorResponse.Error)
-		return
+			if bytes.HasPrefix(chunk, []byte("event:")) {
+				_, _ = c.Writer.Write([]byte("\n"))
+			}
+
+			_, _ = c.Writer.Write(chunk)
+			_, _ = c.Writer.Write([]byte("\n"))
+
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
 	}
 }
--- a/internal/api/handlers/gemini/gemini-cli_handlers.go
+++ b/internal/api/handlers/gemini/gemini-cli_handlers.go
@@ -14,10 +14,10 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers"
-	. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 )
@@ -158,102 +158,9 @@ func (h *GeminiCLIAPIHandler) handleInternalStreamGenerateContent(c *gin.Context
 	modelName := modelResult.String()

 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-outLoop:
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, "")
-
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("gemini cli client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-				_, _ = c.Writer.Write([]byte("data: "))
-				_, _ = c.Writer.Write(chunk)
-				_, _ = c.Writer.Write([]byte("\n\n"))
-
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					errorResponse = err
-					h.LoggingAPIResponseError(cliCtx, err)
-
-					switch err.StatusCode {
-					case 429:
-						if h.Cfg.QuotaExceeded.SwitchProject {
-							log.Debugf("quota exceeded, switch client")
-							continue outLoop // Restart the client selection process
-						}
-					case 403, 408, 500, 502, 503, 504:
-						log.Debugf("http status code %d, switch client", err.StatusCode)
-						retryCount++
-						continue outLoop
-					case 401:
-						log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-						errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-						if errRefreshTokens != nil {
-							log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-							cliClient.SetUnavailable()
-						}
-						retryCount++
-						continue outLoop
-					case 402:
-						cliClient.SetUnavailable()
-						continue outLoop
-					default:
-						// Forward other errors directly to the client
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-		flusher.Flush()
-		cliCancel(errorResponse.Error)
-		return
-	}
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	h.forwardCLIStream(c, flusher, "", func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
 }

 // handleInternalGenerateContent handles non-streaming content generation requests.
@@ -264,72 +171,50 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
 	modelName := modelResult.String()

 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
-	defer func() {
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
-		if err != nil {
-			errorResponse = err
-			h.LoggingAPIResponseError(cliCtx, err)
-
-			switch err.StatusCode {
-			case 429:
-				if h.Cfg.QuotaExceeded.SwitchProject {
-					log.Debugf("quota exceeded, switch client")
-					continue // Restart the client selection process
-				}
-			case 403, 408, 500, 502, 503, 504:
-				log.Debugf("http status code %d, switch client", err.StatusCode)
-				retryCount++
-				continue
-			case 401:
-				log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-				errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-				if errRefreshTokens != nil {
-					log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-					cliClient.SetUnavailable()
-				}
-				retryCount++
-				continue
-			case 402:
-				cliClient.SetUnavailable()
-				continue
-			default:
-				// Forward other errors directly to the client
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			_, _ = c.Writer.Write(resp)
-			cliCancel()
-			break
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = c.Writer.Write([]byte(errorResponse.Error.Error()))
-		cliCancel(errorResponse.Error)
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
 		return
 	}
-
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+func (h *GeminiCLIAPIHandler) forwardCLIStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				cancel(nil)
+				return
+			}
+			if alt == "" {
+				_, _ = c.Writer.Write([]byte("data: "))
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n\n"))
+			} else {
+				_, _ = c.Writer.Write(chunk)
+			}
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
 }
--- a/internal/api/handlers/gemini/gemini_handlers.go
+++ b/internal/api/handlers/gemini/gemini_handlers.go
@@ -13,12 +13,13 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers"
-	. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/registry"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
-	log "github.com/sirupsen/logrus"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
+	coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 )

 // GeminiAPIHandler contains the handlers for Gemini API endpoints.
@@ -210,105 +211,9 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName
 	}

 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-outLoop:
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, alt)
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("gemini client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-
-				if alt == "" {
-					_, _ = c.Writer.Write([]byte("data: "))
-					_, _ = c.Writer.Write(chunk)
-					_, _ = c.Writer.Write([]byte("\n\n"))
-				} else {
-					_, _ = c.Writer.Write(chunk)
-				}
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					errorResponse = err
-					h.LoggingAPIResponseError(cliCtx, err)
-
-					switch err.StatusCode {
-					case 429:
-						if h.Cfg.QuotaExceeded.SwitchProject {
-							log.Debugf("quota exceeded, switch client")
-							continue outLoop // Restart the client selection process
-						}
-					case 403, 408, 500, 502, 503, 504:
-						log.Debugf("http status code %d, switch client", err.StatusCode)
-						retryCount++
-						continue outLoop
-					case 401:
-						log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-						errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-						if errRefreshTokens != nil {
-							log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-							cliClient.SetUnavailable()
-						}
-						retryCount++
-						continue outLoop
-					case 402:
-						cliClient.SetUnavailable()
-						continue outLoop
-					default:
-						// Forward other errors directly to the client
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-		flusher.Flush()
-		cliCancel(errorResponse.Error)
-		return
-	}
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	h.forwardGeminiStream(c, flusher, alt, func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
 }

 // handleCountTokens handles token counting requests for Gemini models.
@@ -324,42 +229,32 @@ func (h *GeminiAPIHandler) handleCountTokens(c *gin.Context, modelName string, r

 	alt := h.GetAlt(c)
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	defer func() { cliCancel() }()

-	var cliClient interfaces.Client
-	defer func() {
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	for {
-		var errorResponse *interfaces.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName, false)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
+	// Execute via AuthManager with action=countTokens
+	req := coreexecutor.Request{
+		Model:   modelName,
+		Payload: rawJSON,
+		Metadata: map[string]any{
+			"action": "countTokens",
+		},
+	}
+	opts := coreexecutor.Options{
+		Stream:          false,
+		Alt:             alt,
+		OriginalRequest: rawJSON,
+		SourceFormat:    sdktranslator.FromString(h.HandlerType()),
+	}
+	resp, err := h.AuthManager.Execute(cliCtx, []string{"gemini"}, req, opts)
+	if err != nil {
+		if msg, ok := executor.UnwrapError(err); ok {
+			h.WriteErrorResponse(c, msg)
 			return
 		}
-
-		resp, err := cliClient.SendRawTokenCount(cliCtx, modelName, rawJSON, alt)
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			_, _ = c.Writer.Write(resp)
-			cliCancel(resp)
-			break
-		}
+		h.WriteErrorResponse(c, &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err})
+		return
 	}
+	_, _ = c.Writer.Write(resp.Payload)
 }

 // handleGenerateContent handles non-streaming content generation requests for Gemini models.
@@ -373,75 +268,52 @@ func (h *GeminiAPIHandler) handleCountTokens(c *gin.Context, modelName string, r
 //   - rawJSON: The raw JSON request body containing generation parameters and content
 func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName string, rawJSON []byte) {
 	c.Header("Content-Type", "application/json")
-
 	alt := h.GetAlt(c)
-
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
-	defer func() {
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, alt)
-		if err != nil {
-			errorResponse = err
-			h.LoggingAPIResponseError(cliCtx, err)
-
-			switch err.StatusCode {
-			case 429:
-				if h.Cfg.QuotaExceeded.SwitchProject {
-					log.Debugf("quota exceeded, switch client")
-					continue // Restart the client selection process
-				}
-			case 403, 408, 500, 502, 503, 504:
-				log.Debugf("http status code %d, switch client", err.StatusCode)
-				retryCount++
-				continue
-			case 401:
-				log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-				errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-				if errRefreshTokens != nil {
-					log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-					cliClient.SetUnavailable()
-				}
-				retryCount++
-				continue
-			case 402:
-				cliClient.SetUnavailable()
-				continue
-			default:
-				// Forward other errors directly to the client
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			_, _ = c.Writer.Write(resp)
-			cliCancel()
-			break
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = c.Writer.Write([]byte(errorResponse.Error.Error()))
-		cliCancel(errorResponse.Error)
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
 		return
 	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+func (h *GeminiAPIHandler) forwardGeminiStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				cancel(nil)
+				return
+			}
+			if alt == "" {
+				_, _ = c.Writer.Write([]byte("data: "))
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n\n"))
+			} else {
+				_, _ = c.Writer.Write(chunk)
+			}
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
 }
--- a/internal/api/handlers/handlers.go
+++ b/internal/api/handlers/handlers.go
@@ -5,12 +5,16 @@ package handlers

 import (
 	"fmt"
-	"sync"
+	"net/http"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/config"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"golang.org/x/net/context"
 )

@@ -38,18 +42,11 @@ type ErrorDetail struct {
 // It holds a pool of clients to interact with the backend service and manages
 // load balancing, client selection, and configuration.
 type BaseAPIHandler struct {
-	// CliClients is the pool of available AI service clients.
-	CliClients []interfaces.Client
+	// AuthManager manages auth lifecycle and execution in the new architecture.
+	AuthManager *coreauth.Manager

 	// Cfg holds the current application configuration.
 	Cfg *config.Config
-
-	// Mutex ensures thread-safe access to shared resources.
-	Mutex *sync.Mutex
-
-	// LastUsedClientIndex tracks the last used client index for each provider
-	// to implement round-robin load balancing.
-	LastUsedClientIndex map[string]int
 }

 // NewBaseAPIHandlers creates a new API handlers instance.
@@ -61,12 +58,10 @@ type BaseAPIHandler struct {
 //
 // Returns:
 //   - *BaseAPIHandler: A new API handlers instance
-func NewBaseAPIHandlers(cliClients []interfaces.Client, cfg *config.Config) *BaseAPIHandler {
+func NewBaseAPIHandlers(cfg *config.Config, authManager *coreauth.Manager) *BaseAPIHandler {
 	return &BaseAPIHandler{
-		CliClients:          cliClients,
-		Cfg:                 cfg,
-		Mutex:               &sync.Mutex{},
-		LastUsedClientIndex: make(map[string]int),
+		Cfg:         cfg,
+		AuthManager: authManager,
 	}
 }

@@ -76,86 +71,7 @@ func NewBaseAPIHandlers(cliClients []interfaces.Client, cfg *config.Config) *Bas
 // Parameters:
 //   - clients: The new slice of AI service clients
 //   - cfg: The new application configuration
-func (h *BaseAPIHandler) UpdateClients(clients []interfaces.Client, cfg *config.Config) {
-	h.CliClients = clients
-	h.Cfg = cfg
-}
-
-// GetClient returns an available client from the pool using round-robin load balancing.
-// It checks for quota limits and tries to find an unlocked client for immediate use.
-// The modelName parameter is used to check quota status for specific models.
-//
-// Parameters:
-//   - modelName: The name of the model to be used
-//   - isGenerateContent: Optional parameter to indicate if this is for content generation
-//
-// Returns:
-//   - client.Client: An available client for the requested model
-//   - *client.ErrorMessage: An error message if no client is available
-func (h *BaseAPIHandler) GetClient(modelName string, isGenerateContent ...bool) (interfaces.Client, *interfaces.ErrorMessage) {
-	clients := make([]interfaces.Client, 0)
-	for i := 0; i < len(h.CliClients); i++ {
-		if h.CliClients[i].CanProvideModel(modelName) && h.CliClients[i].IsAvailable() && !h.CliClients[i].IsModelQuotaExceeded(modelName) {
-			clients = append(clients, h.CliClients[i])
-		}
-	}
-
-	// Lock the mutex to update the last used client index
-	h.Mutex.Lock()
-	if _, hasKey := h.LastUsedClientIndex[modelName]; !hasKey {
-		h.LastUsedClientIndex[modelName] = 0
-	}
-
-	if len(clients) == 0 {
-		h.Mutex.Unlock()
-		return nil, &interfaces.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
-	}
-
-	var cliClient interfaces.Client
-
-	startIndex := h.LastUsedClientIndex[modelName]
-	if (len(isGenerateContent) > 0 && isGenerateContent[0]) || len(isGenerateContent) == 0 {
-		currentIndex := (startIndex + 1) % len(clients)
-		h.LastUsedClientIndex[modelName] = currentIndex
-	}
-	h.Mutex.Unlock()
-
-	// Reorder the client to start from the last used index
-	reorderedClients := make([]interfaces.Client, 0)
-	for i := 0; i < len(clients); i++ {
-		cliClient = clients[(startIndex+1+i)%len(clients)]
-		reorderedClients = append(reorderedClients, cliClient)
-	}
-
-	if len(reorderedClients) == 0 {
-		if util.GetProviderName(modelName, h.Cfg) == "claude" {
-			// log.Debugf("Claude Model %s is quota exceeded for all accounts", modelName)
-			return nil, &interfaces.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account's rate limit. Please try again later."}}`)}
-		}
-		return nil, &interfaces.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName)}
-	}
-
-	locked := false
-	for i := 0; i < len(reorderedClients); i++ {
-		cliClient = reorderedClients[i]
-		if mutex := cliClient.GetRequestMutex(); mutex != nil {
-			if mutex.TryLock() {
-				locked = true
-				break
-			}
-		} else {
-			locked = true
-		}
-	}
-	if !locked {
-		cliClient = clients[0]
-		if mutex := cliClient.GetRequestMutex(); mutex != nil {
-			mutex.Lock()
-		}
-	}
-
-	return cliClient, nil
-}
+func (h *BaseAPIHandler) UpdateClients(cfg *config.Config) { h.Cfg = cfg }

 // GetAlt extracts the 'alt' parameter from the request query string.
 // It checks both 'alt' and '$alt' parameters and returns the appropriate value.
@@ -215,6 +131,109 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 	}
 }

+// ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
+// This path is the only supported execution route.
+func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
+	providers := util.GetProviderName(modelName, h.Cfg)
+	if len(providers) == 0 {
+		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
+	}
+	req := coreexecutor.Request{
+		Model:   modelName,
+		Payload: cloneBytes(rawJSON),
+	}
+	opts := coreexecutor.Options{
+		Stream:          false,
+		Alt:             alt,
+		OriginalRequest: cloneBytes(rawJSON),
+		SourceFormat:    sdktranslator.FromString(handlerType),
+	}
+	resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
+	if err != nil {
+		if msg, ok := executor.UnwrapError(err); ok {
+			return nil, msg
+		}
+		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+	}
+	return cloneBytes(resp.Payload), nil
+}
+
+// ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
+// This path is the only supported execution route.
+func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
+	providers := util.GetProviderName(modelName, h.Cfg)
+	if len(providers) == 0 {
+		errChan := make(chan *interfaces.ErrorMessage, 1)
+		errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
+		close(errChan)
+		return nil, errChan
+	}
+	req := coreexecutor.Request{
+		Model:   modelName,
+		Payload: cloneBytes(rawJSON),
+	}
+	opts := coreexecutor.Options{
+		Stream:          true,
+		Alt:             alt,
+		OriginalRequest: cloneBytes(rawJSON),
+		SourceFormat:    sdktranslator.FromString(handlerType),
+	}
+	chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
+	if err != nil {
+		errChan := make(chan *interfaces.ErrorMessage, 1)
+		if msg, ok := executor.UnwrapError(err); ok {
+			errChan <- msg
+		} else {
+			errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+		}
+		close(errChan)
+		return nil, errChan
+	}
+	dataChan := make(chan []byte)
+	errChan := make(chan *interfaces.ErrorMessage, 1)
+	go func() {
+		defer close(dataChan)
+		defer close(errChan)
+		for chunk := range chunks {
+			if chunk.Err != nil {
+				if msg, ok := executor.UnwrapError(chunk.Err); ok {
+					errChan <- msg
+				} else {
+					errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: chunk.Err}
+				}
+				return
+			}
+			if len(chunk.Payload) > 0 {
+				dataChan <- cloneBytes(chunk.Payload)
+			}
+		}
+	}()
+	return dataChan, errChan
+}
+
+func cloneBytes(src []byte) []byte {
+	if len(src) == 0 {
+		return nil
+	}
+	dst := make([]byte, len(src))
+	copy(dst, src)
+	return dst
+}
+
+// WriteErrorResponse writes an error message to the response writer using the HTTP status embedded in the message.
+func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.ErrorMessage) {
+	status := http.StatusInternalServerError
+	if msg != nil && msg.StatusCode > 0 {
+		status = msg.StatusCode
+	}
+	c.Status(status)
+	if msg != nil && msg.Error != nil {
+		_, _ = c.Writer.Write([]byte(msg.Error.Error()))
+	} else {
+		_, _ = c.Writer.Write([]byte(http.StatusText(status)))
+	}
+}
+
 func (h *BaseAPIHandler) LoggingAPIResponseError(ctx context.Context, err *interfaces.ErrorMessage) {
 	if h.Cfg.RequestLog {
 		if ginContext, ok := ctx.Value("gin").(*gin.Context); ok {
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -13,13 +13,14 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/auth/claude"
-	"github.com/luispater/CLIProxyAPI/v5/internal/auth/codex"
-	geminiAuth "github.com/luispater/CLIProxyAPI/v5/internal/auth/gemini"
-	"github.com/luispater/CLIProxyAPI/v5/internal/auth/qwen"
-	"github.com/luispater/CLIProxyAPI/v5/internal/client"
-	"github.com/luispater/CLIProxyAPI/v5/internal/misc"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
+	geminiAuth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/gemini"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
+	// legacy client removed
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"golang.org/x/oauth2"
@@ -89,6 +90,11 @@ func (h *Handler) DownloadAuthFile(c *gin.Context) {

 // Upload auth file: multipart or raw JSON with ?name=
 func (h *Handler) UploadAuthFile(c *gin.Context) {
+	if h.authManager == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "core auth manager unavailable"})
+		return
+	}
+	ctx := c.Request.Context()
 	if file, err := c.FormFile("file"); err == nil && file != nil {
 		name := filepath.Base(file.Filename)
 		if !strings.HasSuffix(strings.ToLower(name), ".json") {
@@ -96,10 +102,24 @@ func (h *Handler) UploadAuthFile(c *gin.Context) {
 			return
 		}
 		dst := filepath.Join(h.cfg.AuthDir, name)
+		if !filepath.IsAbs(dst) {
+			if abs, errAbs := filepath.Abs(dst); errAbs == nil {
+				dst = abs
+			}
+		}
 		if errSave := c.SaveUploadedFile(file, dst); errSave != nil {
 			c.JSON(500, gin.H{"error": fmt.Sprintf("failed to save file: %v", errSave)})
 			return
 		}
+		data, errRead := os.ReadFile(dst)
+		if errRead != nil {
+			c.JSON(500, gin.H{"error": fmt.Sprintf("failed to read saved file: %v", errRead)})
+			return
+		}
+		if errReg := h.registerAuthFromFile(ctx, dst, data); errReg != nil {
+			c.JSON(500, gin.H{"error": errReg.Error()})
+			return
+		}
 		c.JSON(200, gin.H{"status": "ok"})
 		return
 	}
@@ -118,15 +138,29 @@ func (h *Handler) UploadAuthFile(c *gin.Context) {
 		return
 	}
 	dst := filepath.Join(h.cfg.AuthDir, filepath.Base(name))
+	if !filepath.IsAbs(dst) {
+		if abs, errAbs := filepath.Abs(dst); errAbs == nil {
+			dst = abs
+		}
+	}
 	if errWrite := os.WriteFile(dst, data, 0o600); errWrite != nil {
 		c.JSON(500, gin.H{"error": fmt.Sprintf("failed to write file: %v", errWrite)})
 		return
 	}
+	if err := h.registerAuthFromFile(ctx, dst, data); err != nil {
+		c.JSON(500, gin.H{"error": err.Error()})
+		return
+	}
 	c.JSON(200, gin.H{"status": "ok"})
 }

 // Delete auth files: single by name or all
 func (h *Handler) DeleteAuthFile(c *gin.Context) {
+	if h.authManager == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "core auth manager unavailable"})
+		return
+	}
+	ctx := c.Request.Context()
 	if all := c.Query("all"); all == "true" || all == "1" || all == "*" {
 		entries, err := os.ReadDir(h.cfg.AuthDir)
 		if err != nil {
@@ -143,8 +177,14 @@ func (h *Handler) DeleteAuthFile(c *gin.Context) {
 				continue
 			}
 			full := filepath.Join(h.cfg.AuthDir, name)
+			if !filepath.IsAbs(full) {
+				if abs, errAbs := filepath.Abs(full); errAbs == nil {
+					full = abs
+				}
+			}
 			if err = os.Remove(full); err == nil {
 				deleted++
+				h.disableAuth(ctx, full)
 			}
 		}
 		c.JSON(200, gin.H{"status": "ok", "deleted": deleted})
@@ -156,6 +196,11 @@ func (h *Handler) DeleteAuthFile(c *gin.Context) {
 		return
 	}
 	full := filepath.Join(h.cfg.AuthDir, filepath.Base(name))
+	if !filepath.IsAbs(full) {
+		if abs, errAbs := filepath.Abs(full); errAbs == nil {
+			full = abs
+		}
+	}
 	if err := os.Remove(full); err != nil {
 		if os.IsNotExist(err) {
 			c.JSON(404, gin.H{"error": "file not found"})
@@ -164,9 +209,75 @@ func (h *Handler) DeleteAuthFile(c *gin.Context) {
 		}
 		return
 	}
+	h.disableAuth(ctx, full)
 	c.JSON(200, gin.H{"status": "ok"})
 }

+func (h *Handler) registerAuthFromFile(ctx context.Context, path string, data []byte) error {
+	if h.authManager == nil {
+		return nil
+	}
+	if path == "" {
+		return fmt.Errorf("auth path is empty")
+	}
+	if data == nil {
+		var err error
+		data, err = os.ReadFile(path)
+		if err != nil {
+			return fmt.Errorf("failed to read auth file: %w", err)
+		}
+	}
+	metadata := make(map[string]any)
+	if err := json.Unmarshal(data, &metadata); err != nil {
+		return fmt.Errorf("invalid auth file: %w", err)
+	}
+	provider, _ := metadata["type"].(string)
+	if provider == "" {
+		provider = "unknown"
+	}
+	label := provider
+	if email, ok := metadata["email"].(string); ok && email != "" {
+		label = email
+	}
+	attr := map[string]string{
+		"path":   path,
+		"source": path,
+	}
+	auth := &coreauth.Auth{
+		ID:         path,
+		Provider:   provider,
+		Label:      label,
+		Status:     coreauth.StatusActive,
+		Attributes: attr,
+		Metadata:   metadata,
+		CreatedAt:  time.Now(),
+		UpdatedAt:  time.Now(),
+	}
+	if existing, ok := h.authManager.GetByID(path); ok {
+		auth.CreatedAt = existing.CreatedAt
+		auth.LastRefreshedAt = existing.LastRefreshedAt
+		auth.NextRefreshAfter = existing.NextRefreshAfter
+		auth.Runtime = existing.Runtime
+		_, err := h.authManager.Update(ctx, auth)
+		return err
+	}
+	_, err := h.authManager.Register(ctx, auth)
+	return err
+}
+
+func (h *Handler) disableAuth(ctx context.Context, id string) {
+	if h.authManager == nil || id == "" {
+		return
+	}
+	if auth, ok := h.authManager.GetByID(id); ok {
+		auth.Disabled = true
+		auth.Status = coreauth.StatusDisabled
+		auth.StatusMessage = "removed via management API"
+		auth.UpdatedAt = time.Now()
+		_, _ = h.authManager.Update(ctx, auth)
+	}
+}
+
 func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 	ctx := context.Background()

@@ -307,10 +418,9 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {

 		// Create token storage
 		tokenStorage := anthropicAuth.CreateTokenStorage(bundle)
-		// Initialize Claude client
-		anthropicClient := client.NewClaudeClient(h.cfg, tokenStorage)
-		// Save token storage
-		if errSave := anthropicClient.SaveTokenToFile(); errSave != nil {
+		// Persist token to file directly
+		fileName := filepath.Join(h.cfg.AuthDir, fmt.Sprintf("claude-%s.json", tokenStorage.Email))
+		if errSave := tokenStorage.SaveTokenToFile(fileName); errSave != nil {
 			log.Fatalf("Failed to save authentication tokens: %v", errSave)
 			oauthStatus[state] = "Failed to save authentication tokens"
 			return
@@ -458,7 +568,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {

 		// Initialize authenticated HTTP client via GeminiAuth to honor proxy settings
 		gemAuth := geminiAuth.NewGeminiAuth()
-		httpClient2, errGetClient := gemAuth.GetAuthenticatedClient(ctx, &ts, h.cfg, true)
+		_, errGetClient := gemAuth.GetAuthenticatedClient(ctx, &ts, h.cfg, true)
 		if errGetClient != nil {
 			log.Fatalf("failed to get authenticated client: %v", errGetClient)
 			oauthStatus[state] = "Failed to get authenticated client"
@@ -466,54 +576,9 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		}
 		log.Info("Authentication successful.")

-		// Initialize the API client
-		cliClient := client.NewGeminiCLIClient(httpClient2, &ts, h.cfg)
-
-		// Perform the user setup process (migrated from DoLogin)
-		if err = cliClient.SetupUser(ctx, ts.Email, projectID); err != nil {
-			if err.Error() == "failed to start user onboarding, need define a project id" {
-				log.Error("Failed to start user onboarding: A project ID is required.")
-				oauthStatus[state] = "Failed to start user onboarding: A project ID is required"
-				project, errGetProjectList := cliClient.GetProjectList(ctx)
-				if errGetProjectList != nil {
-					log.Fatalf("Failed to get project list: %v", err)
-					oauthStatus[state] = "Failed to get project list"
-				} else {
-					log.Infof("Your account %s needs to specify a project ID.", ts.Email)
-					log.Info("========================================================================")
-					for _, p := range project.Projects {
-						log.Infof("Project ID: %s", p.ProjectID)
-						log.Infof("Project Name: %s", p.Name)
-						log.Info("------------------------------------------------------------------------")
-					}
-					log.Infof("Please run this command to login again with a specific project:\n\n%s --login --project_id <project_id>\n", os.Args[0])
-				}
-			} else {
-				log.Fatalf("Failed to complete user setup: %v", err)
-				oauthStatus[state] = "Failed to complete user setup"
-			}
-			return
-		}
-
-		// Post-setup checks and token persistence
-		auto := projectID == ""
-		cliClient.SetIsAuto(auto)
-		if !cliClient.IsChecked() && !cliClient.IsAuto() {
-			isChecked, checkErr := cliClient.CheckCloudAPIIsEnabled()
-			if checkErr != nil {
-				log.Fatalf("Failed to check if Cloud AI API is enabled: %v", checkErr)
-				oauthStatus[state] = "Failed to check if Cloud AI API is enabled"
-				return
-			}
-			cliClient.SetIsChecked(isChecked)
-			if !isChecked {
-				log.Fatal("Failed to check if Cloud AI API is enabled. If you encounter an error message, please create an issue.")
-				oauthStatus[state] = "Failed to check if Cloud AI API is enabled"
-				return
-			}
-		}
-
-		if err = cliClient.SaveTokenToFile(); err != nil {
+		// Persist token to file directly
+		fileName := filepath.Join(h.cfg.AuthDir, fmt.Sprintf("gemini-%s.json", ts.Email))
+		if err = ts.SaveTokenToFile(fileName); err != nil {
 			log.Fatalf("Failed to save token to file: %v", err)
 			oauthStatus[state] = "Failed to save token to file"
 			return
@@ -655,13 +720,8 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {

 		// Create token storage and persist
 		tokenStorage := openaiAuth.CreateTokenStorage(bundle)
-		openaiClient, errInit := client.NewCodexClient(h.cfg, tokenStorage)
-		if errInit != nil {
-			oauthStatus[state] = "Failed to initialize Codex client"
-			log.Fatalf("Failed to initialize Codex client: %v", errInit)
-			return
-		}
-		if errSave := openaiClient.SaveTokenToFile(); errSave != nil {
+		fileName := filepath.Join(h.cfg.AuthDir, fmt.Sprintf("codex-%s.json", tokenStorage.Email))
+		if errSave := tokenStorage.SaveTokenToFile(fileName); errSave != nil {
 			oauthStatus[state] = "Failed to save authentication tokens"
 			log.Fatalf("Failed to save authentication tokens: %v", errSave)
 			return
@@ -707,13 +767,10 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 		// Create token storage
 		tokenStorage := qwenAuth.CreateTokenStorage(tokenData)

-		// Initialize Qwen client
-		qwenClient := client.NewQwenClient(h.cfg, tokenStorage)
-
 		tokenStorage.Email = fmt.Sprintf("qwen-%d", time.Now().UnixMilli())
-
 		// Save token storage
-		if err = qwenClient.SaveTokenToFile(); err != nil {
+		fileName := filepath.Join(h.cfg.AuthDir, fmt.Sprintf("qwen-%s.json", tokenStorage.Email))
+		if err = tokenStorage.SaveTokenToFile(fileName); err != nil {
 			log.Fatalf("Failed to save authentication tokens: %v", err)
 			oauthStatus[state] = "Failed to save authentication tokens"
 			return
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -5,7 +5,7 @@ import (
 	"fmt"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 )

 // Generic helpers for list[string]
--- a/internal/api/handlers/management/handler.go
+++ b/internal/api/handlers/management/handler.go
@@ -10,7 +10,8 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	"golang.org/x/crypto/bcrypt"
 )

@@ -27,16 +28,20 @@ type Handler struct {

 	attemptsMu     sync.Mutex
 	failedAttempts map[string]*attemptInfo // keyed by client IP
+	authManager    *coreauth.Manager
 }

 // NewHandler creates a new management handler instance.
-func NewHandler(cfg *config.Config, configFilePath string) *Handler {
-	return &Handler{cfg: cfg, configFilePath: configFilePath, failedAttempts: make(map[string]*attemptInfo)}
+func NewHandler(cfg *config.Config, configFilePath string, manager *coreauth.Manager) *Handler {
+	return &Handler{cfg: cfg, configFilePath: configFilePath, failedAttempts: make(map[string]*attemptInfo), authManager: manager}
 }

 // SetConfig updates the in-memory config reference when the server hot-reloads.
 func (h *Handler) SetConfig(cfg *config.Config) { h.cfg = cfg }

+// SetAuthManager updates the auth manager reference used by management endpoints.
+func (h *Handler) SetAuthManager(manager *coreauth.Manager) { h.authManager = manager }
+
 // Middleware enforces access control for management endpoints.
 // All requests (local and remote) require a valid management key.
 // Additionally, remote access requires allow-remote-management=true.
--- a/internal/api/handlers/openai/openai_handlers.go
+++ b/internal/api/handlers/openai/openai_handlers.go
@@ -14,12 +14,10 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers"
-	. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/registry"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
-	log "github.com/sirupsen/logrus"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -401,73 +399,14 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []

 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
-	defer func() {
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
-		if err != nil {
-			errorResponse = err
-			h.LoggingAPIResponseError(cliCtx, err)
-
-			switch err.StatusCode {
-			case 429:
-				if h.Cfg.QuotaExceeded.SwitchProject {
-					log.Debugf("quota exceeded, switch client")
-					continue // Restart the client selection process
-				}
-			case 403, 408, 500, 502, 503, 504:
-				log.Debugf("http status code %d, switch client", err.StatusCode)
-				retryCount++
-				continue
-			case 401:
-				log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-				errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-				if errRefreshTokens != nil {
-					log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-					cliClient.SetUnavailable()
-				}
-				retryCount++
-				continue
-			case 402:
-				cliClient.SetUnavailable()
-				continue
-			default:
-				// Forward other errors directly to the client
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			_, _ = c.Writer.Write(resp)
-			cliCancel()
-			break
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = c.Writer.Write([]byte(errorResponse.Error.Error()))
-		cliCancel(errorResponse.Error)
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
 		return
 	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
 }

 // handleStreamingResponse handles streaming responses for Gemini models.
@@ -497,103 +436,8 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt

 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-outLoop:
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, "")
-
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("openai client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					// Stream is closed, send the final [DONE] message.
-					_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					errorResponse = err
-					h.LoggingAPIResponseError(cliCtx, err)
-
-					switch err.StatusCode {
-					case 429:
-						if h.Cfg.QuotaExceeded.SwitchProject {
-							log.Debugf("quota exceeded, switch client")
-							continue outLoop // Restart the client selection process
-						}
-					case 403, 408, 500, 502, 503, 504:
-						log.Debugf("http status code %d, switch client", err.StatusCode)
-						retryCount++
-						continue outLoop
-					case 401:
-						log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-						errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-						if errRefreshTokens != nil {
-							log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-							cliClient.SetUnavailable()
-						}
-						retryCount++
-						continue outLoop
-					case 402:
-						cliClient.SetUnavailable()
-						continue outLoop
-					default:
-						// Forward other errors directly to the client
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-		flusher.Flush()
-		cliCancel(errorResponse.Error)
-		return
-	}
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
+	h.handleStreamResult(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
 }

 // handleCompletionsNonStreamingResponse handles non-streaming completions responses.
@@ -611,77 +455,15 @@ func (h *OpenAIAPIHandler) handleCompletionsNonStreamingResponse(c *gin.Context,

 	modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
-	defer func() {
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		// Send the converted chat completions request
-		resp, err := cliClient.SendRawMessage(cliCtx, modelName, chatCompletionsJSON, "")
-		if err != nil {
-			errorResponse = err
-			h.LoggingAPIResponseError(cliCtx, err)
-
-			switch err.StatusCode {
-			case 429:
-				if h.Cfg.QuotaExceeded.SwitchProject {
-					log.Debugf("quota exceeded, switch client")
-					continue // Restart the client selection process
-				}
-			case 403, 408, 500, 502, 503, 504:
-				log.Debugf("http status code %d, switch client", err.StatusCode)
-				retryCount++
-				continue
-			case 401:
-				log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-				errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-				if errRefreshTokens != nil {
-					log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-					cliClient.SetUnavailable()
-				}
-				retryCount++
-				continue
-			case 402:
-				cliClient.SetUnavailable()
-				continue
-			default:
-				// Forward other errors directly to the client
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			// Convert chat completions response back to completions format
-			completionsResp := convertChatCompletionsResponseToCompletions(resp)
-			_, _ = c.Writer.Write(completionsResp)
-			cliCancel()
-			break
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = c.Writer.Write([]byte(errorResponse.Error.Error()))
-		cliCancel(errorResponse.Error)
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
 		return
 	}
-
+	completionsResp := convertChatCompletionsResponseToCompletions(resp)
+	_, _ = c.Writer.Write(completionsResp)
+	cliCancel()
 }

 // handleCompletionsStreamingResponse handles streaming completions responses.
@@ -714,106 +496,73 @@ func (h *OpenAIAPIHandler) handleCompletionsStreamingResponse(c *gin.Context, ra

 	modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")

-	var cliClient interfaces.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-outLoop:
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cliCancel(c.Request.Context().Err())
 			return
-		}
-
-		// Send the converted chat completions request and receive response chunks
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, chatCompletionsJSON, "")
-
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					// Stream is closed, send the final [DONE] message.
-					_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				// Convert chat completions chunk to completions chunk format
-				completionsChunk := convertChatCompletionsStreamChunkToCompletions(chunk)
-				// Skip this chunk if it has no meaningful content (empty text)
-				if completionsChunk != nil {
-					_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(completionsChunk))
-					flusher.Flush()
-				}
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					errorResponse = err
-					h.LoggingAPIResponseError(cliCtx, err)
-
-					switch err.StatusCode {
-					case 429:
-						if h.Cfg.QuotaExceeded.SwitchProject {
-							log.Debugf("quota exceeded, switch client")
-							continue outLoop // Restart the client selection process
-						}
-					case 403, 408, 500, 502, 503, 504:
-						log.Debugf("http status code %d, switch client", err.StatusCode)
-						retryCount++
-						continue outLoop
-					case 401:
-						log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-						errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-						if errRefreshTokens != nil {
-							log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-							cliClient.SetUnavailable()
-						}
-						retryCount++
-						continue outLoop
-					case 402:
-						cliClient.SetUnavailable()
-						continue outLoop
-					default:
-						// Forward other errors directly to the client
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
+		case chunk, isOk := <-dataChan:
+			if !isOk {
+				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+				flusher.Flush()
+				cliCancel()
+				return
 			}
+			converted := convertChatCompletionsStreamChunkToCompletions(chunk)
+			if converted != nil {
+				_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(converted))
+				flusher.Flush()
+			}
+		case errMsg, isOk := <-errChan:
+			if !isOk {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cliCancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
+}
+func (h *OpenAIAPIHandler) handleStreamResult(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+				flusher.Flush()
+				cancel(nil)
+				return
+			}
+			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
 		}
 	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-		flusher.Flush()
-		cliCancel(errorResponse.Error)
-		return
-	}
 }
--- a/internal/api/handlers/openai/openai_responses_handlers.go
+++ b/internal/api/handlers/openai/openai_responses_handlers.go
@@ -7,18 +7,17 @@
 package openai

 import (
+	"bytes"
 	"context"
 	"fmt"
 	"net/http"
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers"
-	. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/registry"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
-	log "github.com/sirupsen/logrus"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/tidwall/gjson"
 )

@@ -105,73 +104,19 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r

 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-
-	var cliClient interfaces.Client
 	defer func() {
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
+		cliCancel()
 	}()

-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
-		if err != nil {
-			errorResponse = err
-			h.LoggingAPIResponseError(cliCtx, err)
-
-			switch err.StatusCode {
-			case 429:
-				if h.Cfg.QuotaExceeded.SwitchProject {
-					log.Debugf("quota exceeded, switch client")
-					continue // Restart the client selection process
-				}
-			case 403, 408, 500, 502, 503, 504:
-				log.Debugf("http status code %d, switch client", err.StatusCode)
-				retryCount++
-				continue
-			case 401:
-				log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-				errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-				if errRefreshTokens != nil {
-					log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-					cliClient.SetUnavailable()
-				}
-				retryCount++
-				continue
-			case 402:
-				cliClient.SetUnavailable()
-				continue
-			default:
-				// Forward other errors directly to the client
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			_, _ = c.Writer.Write(resp)
-			cliCancel()
-			break
-		}
-	}
-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = c.Writer.Write([]byte(errorResponse.Error.Error()))
-		cliCancel(errorResponse.Error)
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
 		return
 	}
+	_, _ = c.Writer.Write(resp)
+	return
+
+	// no legacy fallback

 }

@@ -200,102 +145,49 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
 		return
 	}

+	// New core execution path
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	h.forwardResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
+}

-	var cliClient interfaces.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			if mutex := cliClient.GetRequestMutex(); mutex != nil {
-				mutex.Unlock()
-			}
-		}
-	}()
-
-	var errorResponse *interfaces.ErrorMessage
-	retryCount := 0
-outLoop:
-	for retryCount <= h.Cfg.RequestRetry {
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
+func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
 			return
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, "")
-
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("openai client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				_, _ = c.Writer.Write(chunk)
-				_, _ = c.Writer.Write([]byte("\n"))
+		case chunk, ok := <-data:
+			if !ok {
 				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					errorResponse = err
-					h.LoggingAPIResponseError(cliCtx, err)
-					switch err.StatusCode {
-					case 429:
-						if h.Cfg.QuotaExceeded.SwitchProject {
-							log.Debugf("quota exceeded, switch client")
-							continue outLoop // Restart the client selection process
-						}
-					case 403, 408, 500, 502, 503, 504:
-						log.Debugf("http status code %d, switch client", err.StatusCode)
-						retryCount++
-						continue outLoop
-					case 401:
-						log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
-						errRefreshTokens := cliClient.RefreshTokens(cliCtx)
-						if errRefreshTokens != nil {
-							log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
-							cliClient.SetUnavailable()
-						}
-						retryCount++
-						continue outLoop
-					case 402:
-						cliClient.SetUnavailable()
-						continue outLoop
-					default:
-						// Forward other errors directly to the client
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
+				cancel(nil)
+				return
 			}
-		}
-	}

-	if errorResponse != nil {
-		c.Status(errorResponse.StatusCode)
-		_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-		flusher.Flush()
-		cliCancel(errorResponse.Error)
-		return
+			if bytes.HasPrefix(chunk, []byte("event:")) {
+				_, _ = c.Writer.Write([]byte("\n"))
+			}
+			_, _ = c.Writer.Write(chunk)
+			_, _ = c.Writer.Write([]byte("\n"))
+
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
 	}
 }
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -8,7 +8,7 @@ import (
 	"io"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/logging"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )

 // RequestLoggingMiddleware creates a Gin middleware that logs HTTP requests and responses.
--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -8,8 +8,8 @@ import (
 	"strings"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/logging"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )

 // RequestInfo holds essential details of an incoming HTTP request for logging purposes.
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -14,20 +14,61 @@ import (
 	"strings"

 	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers/claude"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers/gemini"
-	managementHandlers "github.com/luispater/CLIProxyAPI/v5/internal/api/handlers/management"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/handlers/openai"
-	"github.com/luispater/CLIProxyAPI/v5/internal/api/middleware"
-	"github.com/luispater/CLIProxyAPI/v5/internal/client"
-	"github.com/luispater/CLIProxyAPI/v5/internal/config"
-	"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
-	"github.com/luispater/CLIProxyAPI/v5/internal/logging"
-	"github.com/luispater/CLIProxyAPI/v5/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers/claude"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers/gemini"
+	managementHandlers "github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers/management"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers/openai"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/middleware"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
 )

+type serverOptionConfig struct {
+	extraMiddleware      []gin.HandlerFunc
+	engineConfigurator   func(*gin.Engine)
+	routerConfigurator   func(*gin.Engine, *handlers.BaseAPIHandler, *config.Config)
+	requestLoggerFactory func(*config.Config, string) logging.RequestLogger
+}
+
+// ServerOption customises HTTP server construction.
+type ServerOption func(*serverOptionConfig)
+
+func defaultRequestLoggerFactory(cfg *config.Config, configPath string) logging.RequestLogger {
+	return logging.NewFileRequestLogger(cfg.RequestLog, "logs", filepath.Dir(configPath))
+}
+
+// WithMiddleware appends additional Gin middleware during server construction.
+func WithMiddleware(mw ...gin.HandlerFunc) ServerOption {
+	return func(cfg *serverOptionConfig) {
+		cfg.extraMiddleware = append(cfg.extraMiddleware, mw...)
+	}
+}
+
+// WithEngineConfigurator allows callers to mutate the Gin engine prior to middleware setup.
+func WithEngineConfigurator(fn func(*gin.Engine)) ServerOption {
+	return func(cfg *serverOptionConfig) {
+		cfg.engineConfigurator = fn
+	}
+}
+
+// WithRouterConfigurator appends a callback after default routes are registered.
+func WithRouterConfigurator(fn func(*gin.Engine, *handlers.BaseAPIHandler, *config.Config)) ServerOption {
+	return func(cfg *serverOptionConfig) {
+		cfg.routerConfigurator = fn
+	}
+}
+
+// WithRequestLoggerFactory customises request logger creation.
+func WithRequestLoggerFactory(factory func(*config.Config, string) logging.RequestLogger) ServerOption {
+	return func(cfg *serverOptionConfig) {
+		cfg.requestLoggerFactory = factory
+	}
+}
+
 // Server represents the main API server.
 // It encapsulates the Gin engine, HTTP server, handlers, and configuration.
 type Server struct {
@@ -44,7 +85,8 @@ type Server struct {
 	cfg *config.Config

 	// requestLogger is the request logger instance for dynamic configuration updates.
-	requestLogger *logging.FileRequestLogger
+	requestLogger logging.RequestLogger
+	loggerToggle  func(bool)

 	// configFilePath is the absolute path to the YAML config file for persistence.
 	configFilePath string
@@ -58,11 +100,16 @@ type Server struct {
 //
 // Parameters:
 //   - cfg: The server configuration
-//   - cliClients: A slice of AI service clients
 //
 // Returns:
 //   - *Server: A new server instance
-func NewServer(cfg *config.Config, cliClients []interfaces.Client, configFilePath string) *Server {
+func NewServer(cfg *config.Config, authManager *auth.Manager, configFilePath string, opts ...ServerOption) *Server {
+	optionState := &serverOptionConfig{
+		requestLoggerFactory: defaultRequestLoggerFactory,
+	}
+	for i := range opts {
+		opts[i](optionState)
+	}
 	// Set gin mode
 	if !cfg.Debug {
 		gin.SetMode(gin.ReleaseMode)
@@ -70,31 +117,50 @@ func NewServer(cfg *config.Config, cliClients []interfaces.Client, configFilePat

 	// Create gin engine
 	engine := gin.New()
+	if optionState.engineConfigurator != nil {
+		optionState.engineConfigurator(engine)
+	}

 	// Add middleware
 	engine.Use(gin.Logger())
 	engine.Use(gin.Recovery())
+	for _, mw := range optionState.extraMiddleware {
+		engine.Use(mw)
+	}

 	// Add request logging middleware (positioned after recovery, before auth)
 	// Resolve logs directory relative to the configuration file directory.
-	requestLogger := logging.NewFileRequestLogger(cfg.RequestLog, "logs", filepath.Dir(configFilePath))
-	engine.Use(middleware.RequestLoggingMiddleware(requestLogger))
+	var requestLogger logging.RequestLogger
+	var toggle func(bool)
+	if optionState.requestLoggerFactory != nil {
+		requestLogger = optionState.requestLoggerFactory(cfg, configFilePath)
+	}
+	if requestLogger != nil {
+		engine.Use(middleware.RequestLoggingMiddleware(requestLogger))
+		if setter, ok := requestLogger.(interface{ SetEnabled(bool) }); ok {
+			toggle = setter.SetEnabled
+		}
+	}

 	engine.Use(corsMiddleware())

 	// Create server instance
 	s := &Server{
 		engine:         engine,
-		handlers:       handlers.NewBaseAPIHandlers(cliClients, cfg),
+		handlers:       handlers.NewBaseAPIHandlers(cfg, authManager),
 		cfg:            cfg,
 		requestLogger:  requestLogger,
+		loggerToggle:   toggle,
 		configFilePath: configFilePath,
 	}
 	// Initialize management handler
-	s.mgmt = managementHandlers.NewHandler(cfg, configFilePath)
+	s.mgmt = managementHandlers.NewHandler(cfg, configFilePath, authManager)

 	// Setup routes
 	s.setupRoutes()
+	if optionState.routerConfigurator != nil {
+		optionState.routerConfigurator(engine, s.handlers, cfg)
+	}

 	// Create HTTP server
 	s.server = &http.Server{
@@ -349,11 +415,14 @@ func corsMiddleware() gin.HandlerFunc {
 // Parameters:
 //   - clients: The new slice of AI service clients
 //   - cfg: The new application configuration
-func (s *Server) UpdateClients(clients map[string]interfaces.Client, cfg *config.Config) {
-	clientSlice := s.clientsToSlice(clients)
+func (s *Server) UpdateClients(cfg *config.Config) {
 	// Update request logger enabled state if it has changed
 	if s.requestLogger != nil && s.cfg.RequestLog != cfg.RequestLog {
-		s.requestLogger.SetEnabled(cfg.RequestLog)
+		if s.loggerToggle != nil {
+			s.loggerToggle(cfg.RequestLog)
+		} else if toggler, ok := s.requestLogger.(interface{ SetEnabled(bool) }); ok {
+			toggler.SetEnabled(cfg.RequestLog)
+		}
 		log.Debugf("request logging updated from %t to %t", s.cfg.RequestLog, cfg.RequestLog)
 	}

@@ -364,47 +433,49 @@ func (s *Server) UpdateClients(clients map[string]interfaces.Client, cfg *config
 	}

 	s.cfg = cfg
-	s.handlers.UpdateClients(clientSlice, cfg)
+	s.handlers.UpdateClients(cfg)
 	if s.mgmt != nil {
 		s.mgmt.SetConfig(cfg)
+		s.mgmt.SetAuthManager(s.handlers.AuthManager)
 	}

-	// Count client types for detailed logging
+	// Count types from AuthManager state + config
 	authFiles := 0
 	glAPIKeyCount := 0
 	claudeAPIKeyCount := 0
 	codexAPIKeyCount := 0
 	openAICompatCount := 0

-	for _, c := range clientSlice {
-		switch cl := c.(type) {
-		case *client.GeminiCLIClient:
-			authFiles++
-		case *client.GeminiWebClient:
-			authFiles++
-		case *client.CodexClient:
-			if cl.GetAPIKey() == "" {
-				authFiles++
-			} else {
+	if s.handlers != nil && s.handlers.AuthManager != nil {
+		for _, a := range s.handlers.AuthManager.List() {
+			if a == nil {
+				continue
+			}
+			if a.Attributes != nil {
+				if p := a.Attributes["path"]; p != "" {
+					authFiles++
+					continue
+				}
+			}
+			switch strings.ToLower(a.Provider) {
+			case "gemini":
+				glAPIKeyCount++
+			case "claude":
+				claudeAPIKeyCount++
+			case "codex":
 				codexAPIKeyCount++
 			}
-		case *client.ClaudeClient:
-			if cl.GetAPIKey() == "" {
-				authFiles++
-			} else {
-				claudeAPIKeyCount++
-			}
-		case *client.QwenClient:
-			authFiles++
-		case *client.GeminiClient:
-			glAPIKeyCount++
-		case *client.OpenAICompatibilityClient:
-			openAICompatCount++
+		}
+	}
+	if cfg != nil {
+		for i := range cfg.OpenAICompatibility {
+			openAICompatCount += len(cfg.OpenAICompatibility[i].APIKeys)
 		}
 	}

+	total := authFiles + glAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + openAICompatCount
 	log.Infof("server clients and configuration updated: %d clients (%d auth files + %d GL API keys + %d Claude API keys + %d Codex keys + %d OpenAI-compat)",
-		len(clientSlice),
+		total,
 		authFiles,
 		glAPIKeyCount,
 		claudeAPIKeyCount,
@@ -481,10 +552,4 @@ func AuthMiddleware(cfg *config.Config) gin.HandlerFunc {
 	}
 }

-func (s *Server) clientsToSlice(clientMap map[string]interfaces.Client) []interfaces.Client {
-	slice := make([]interfaces.Client, 0, len(clientMap))
-	for _, v := range clientMap {
-		slice = append(slice, v)
-	}
-	return slice
-}
+// legacy clientsToSlice removed; handlers no longer consume legacy client slices