Refactor codebase

2026-02-18 04:10:51 +08:00 · 2025-08-22 01:31:12 +08:00
parent 2b1762be16
commit 8c555c4e69
109 changed files with 7319 additions and 5735 deletions
--- a/internal/api/handlers/claude/code_handlers.go
+++ b/internal/api/handlers/claude/code_handlers.go
@@ -7,43 +7,56 @@
 package claude

 import (
-	"bytes"
 	"context"
 	"fmt"
 	"net/http"
-	"strings"
 	"time"

 	"github.com/gin-gonic/gin"
 	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/internal/client"
-	translatorClaudeCodeToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/claude/code"
-	translatorClaudeCodeToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/claude/code"
-	translatorClaudeCodeToQwen "github.com/luispater/CLIProxyAPI/internal/translator/openai/claude"
-	"github.com/luispater/CLIProxyAPI/internal/util"
+	. "github.com/luispater/CLIProxyAPI/internal/constant"
+	"github.com/luispater/CLIProxyAPI/internal/interfaces"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
 )

-// ClaudeCodeAPIHandlers contains the handlers for Claude API endpoints.
+// ClaudeCodeAPIHandler contains the handlers for Claude API endpoints.
 // It holds a pool of clients to interact with the backend service.
-type ClaudeCodeAPIHandlers struct {
-	*handlers.APIHandlers
+type ClaudeCodeAPIHandler struct {
+	*handlers.BaseAPIHandler
 }

-// NewClaudeCodeAPIHandlers creates a new Claude API handlers instance.
-// It takes an APIHandlers instance as input and returns a ClaudeCodeAPIHandlers.
-func NewClaudeCodeAPIHandlers(apiHandlers *handlers.APIHandlers) *ClaudeCodeAPIHandlers {
-	return &ClaudeCodeAPIHandlers{
-		APIHandlers: apiHandlers,
+// NewClaudeCodeAPIHandler creates a new Claude API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns a ClaudeCodeAPIHandler.
+//
+// Parameters:
+//   - apiHandlers: The base API handler instance.
+//
+// Returns:
+//   - *ClaudeCodeAPIHandler: A new Claude code API handler instance.
+func NewClaudeCodeAPIHandler(apiHandlers *handlers.BaseAPIHandler) *ClaudeCodeAPIHandler {
+	return &ClaudeCodeAPIHandler{
+		BaseAPIHandler: apiHandlers,
 	}
 }

+// HandlerType returns the identifier for this handler implementation.
+func (h *ClaudeCodeAPIHandler) HandlerType() string {
+	return CLAUDE
+}
+
+// Models returns a list of models supported by this handler.
+func (h *ClaudeCodeAPIHandler) Models() []map[string]any {
+	return make([]map[string]any, 0)
+}
+
 // ClaudeMessages handles Claude-compatible streaming chat completions.
 // This function implements a sophisticated client rotation and quota management system
 // to ensure high availability and optimal resource utilization across multiple backend clients.
-func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
+//
+// Parameters:
+//   - c: The Gin context for the request.
+func (h *ClaudeCodeAPIHandler) ClaudeMessages(c *gin.Context) {
 	// Extract raw JSON data from the incoming request
 	rawJSON, err := c.GetRawData()
 	// If data retrieval fails, return a 400 Bad Request error.
@@ -57,34 +70,23 @@ func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
 		return
 	}

-	// h.handleGeminiStreamingResponse(c, rawJSON)
-	// h.handleCodexStreamingResponse(c, rawJSON)
-	modelName := gjson.GetBytes(rawJSON, "model")
-	provider := util.GetProviderName(modelName.String())
-
 	// Check if the client requested a streaming response.
 	streamResult := gjson.GetBytes(rawJSON, "stream")
 	if !streamResult.Exists() || streamResult.Type == gjson.False {
 		return
 	}

-	if provider == "gemini" {
-		h.handleGeminiStreamingResponse(c, rawJSON)
-	} else if provider == "gpt" {
-		h.handleCodexStreamingResponse(c, rawJSON)
-	} else if provider == "claude" {
-		h.handleClaudeStreamingResponse(c, rawJSON)
-	} else if provider == "qwen" {
-		h.handleQwenStreamingResponse(c, rawJSON)
-	} else {
-		h.handleGeminiStreamingResponse(c, rawJSON)
-	}
+	h.handleStreamingResponse(c, rawJSON)
 }

-// handleGeminiStreamingResponse streams Claude-compatible responses backed by Gemini.
+// handleStreamingResponse streams Claude-compatible responses backed by Gemini.
 // It sets up SSE, selects a backend client with rotation/quota logic,
 // forwards chunks, and translates them to Claude CLI format.
-func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSON []byte) {
+//
+// Parameters:
+//   - c: The Gin context for the request.
+//   - rawJSON: The raw JSON request body.
+func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
 	// Set up Server-Sent Events (SSE) headers for streaming response
 	// These headers are essential for maintaining a persistent connection
 	// and enabling real-time streaming of chat completions
@@ -106,16 +108,13 @@ func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, ra
 		return
 	}

-	// Parse and prepare the Claude request, extracting model name, system instructions,
-	// conversation contents, and available tools from the raw JSON
-	modelName, systemInstruction, contents, tools := translatorClaudeCodeToGeminiCli.ConvertClaudeCodeRequestToCli(rawJSON)
+	modelName := gjson.GetBytes(rawJSON, "model").String()

 	// Create a cancellable context for the backend client request
 	// This allows proper cleanup and cancellation of ongoing requests
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())

-	var cliClient client.Client
-	cliClient = client.NewGeminiClient(nil, nil, nil)
+	var cliClient interfaces.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
 		// This prevents deadlocks and ensures proper resource cleanup
@@ -128,7 +127,7 @@ func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, ra
 	// This loop implements a sophisticated load balancing and failover mechanism
 outLoop:
 	for {
-		var errorResponse *client.ErrorMessage
+		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
@@ -138,24 +137,8 @@ outLoop:
 			return
 		}

-		// Determine the authentication method being used by the selected client
-		// This affects how responses are formatted and logged
-		isGlAPIKey := false
-		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use gemini generative language API Key: %s", glAPIKey)
-			isGlAPIKey = true
-		} else {
-			log.Debugf("Request use gemini account: %s, project id: %s", cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
-		}
-		// Initiate streaming communication with the backend client
-		// This returns two channels: one for response chunks and one for errors
-
-		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, true)
-
-		// Track response state for proper Claude format conversion
-		hasFirstResponse := false
-		responseType := 0
-		responseIndex := 0
+		// Initiate streaming communication with the backend client using raw JSON
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, "")

 		// Main streaming loop - handles multiple concurrent events using Go channels
 		// This select statement manages four different types of events simultaneously
@@ -174,29 +157,13 @@ outLoop:
 			// This handles the actual streaming data from the AI model
 			case chunk, okStream := <-respChan:
 				if !okStream {
-					// Stream has ended - send the final message_stop event
-					// This follows the Claude API specification for stream termination
-					_, _ = c.Writer.Write([]byte(`event: message_stop`))
-					_, _ = c.Writer.Write([]byte("\n"))
-					_, _ = c.Writer.Write([]byte(`data: {"type":"message_stop"}`))
-					_, _ = c.Writer.Write([]byte("\n\n\n"))
-
 					flusher.Flush()
 					cliCancel()
 					return
 				}

-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-				// Convert the backend response to Claude-compatible format
-				// This translation layer ensures API compatibility
-				claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
-				if claudeFormat != "" {
-					_, _ = c.Writer.Write([]byte(claudeFormat))
-					flusher.Flush() // Immediately send the chunk to the client
-				}
-				hasFirstResponse = true
-
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n"))
 			// Case 3: Handle errors from the backend
 			// This manages various error conditions and implements retry logic
 			case errInfo, okError := <-errChan:
@@ -218,452 +185,6 @@ outLoop:
 			// Case 4: Send periodic keep-alive signals
 			// Prevents connection timeouts during long-running requests
 			case <-time.After(500 * time.Millisecond):
-				if hasFirstResponse {
-					// Send a ping event to maintain the connection
-					// This is especially important for slow AI model responses
-					// output := "event: ping\n"
-					// output = output + `data: {"type": "ping"}`
-					// output = output + "\n\n\n"
-					// _, _ = c.Writer.Write([]byte(output))
-					//
-					// flusher.Flush()
-				}
-			}
-		}
-	}
-}
-
-// handleCodexStreamingResponse streams Claude-compatible responses backed by OpenAI.
-// It converts the Claude request into Codex/OpenAI responses format, establishes SSE,
-// and translates streaming chunks back into Claude CLI events.
-func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON []byte) {
-	// Set up Server-Sent Events (SSE) headers for streaming response
-	// These headers are essential for maintaining a persistent connection
-	// and enabling real-time streaming of chat completions
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	// This is crucial for streaming as it allows immediate sending of data chunks
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	// Parse and prepare the Claude request, extracting model name, system instructions,
-	// conversation contents, and available tools from the raw JSON
-	newRequestJSON := translatorClaudeCodeToCodex.ConvertClaudeCodeRequestToCodex(rawJSON)
-	modelName := gjson.GetBytes(rawJSON, "model").String()
-
-	newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
-	// log.Debugf(string(rawJSON))
-	// log.Debugf(newRequestJSON)
-	// return
-	// Create a cancellable context for the backend client request
-	// This allows proper cleanup and cancellation of ongoing requests
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		// This prevents deadlocks and ensures proper resource cleanup
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-	// Main client rotation loop with quota management
-	// This loop implements a sophisticated load balancing and failover mechanism
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request use codex account: %s", cliClient.GetEmail())
-
-		// Initiate streaming communication with the backend client
-		// This returns two channels: one for response chunks and one for errors
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-
-		// Track response state for proper Claude format conversion
-		// hasFirstResponse := false
-		hasToolCall := false
-
-		// Main streaming loop - handles multiple concurrent events using Go channels
-		// This select statement manages four different types of events simultaneously
-		for {
-			select {
-			// Case 1: Handle client disconnection
-			// Detects when the HTTP client has disconnected and cleans up resources
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request to prevent resource leaks
-					return
-				}
-
-			// Case 2: Process incoming response chunks from the backend
-			// This handles the actual streaming data from the AI model
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				// Convert the backend response to Claude-compatible format
-				// This translation layer ensures API compatibility
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					var claudeFormat string
-					claudeFormat, hasToolCall = translatorClaudeCodeToCodex.ConvertCodexResponseToClaude(jsonData, hasToolCall)
-					// log.Debugf("claudeFormat: %s", claudeFormat)
-					if claudeFormat != "" {
-						_, _ = c.Writer.Write([]byte(claudeFormat))
-						_, _ = c.Writer.Write([]byte("\n"))
-					}
-					flusher.Flush() // Immediately send the chunk to the client
-					// hasFirstResponse = true
-				} else {
-					// log.Debugf("chunk: %s", string(chunk))
-				}
-			// Case 3: Handle errors from the backend
-			// This manages various error conditions and implements retry logic
-			case errInfo, okError := <-errChan:
-				if okError {
-					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
-					// Special handling for quota exceeded errors
-					// If configured, attempt to switch to a different project/client
-					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						log.Debugf("quota exceeded, switch client")
-						continue outLoop // Restart the client selection process
-					} else {
-						// Forward other errors directly to the client
-						c.Status(errInfo.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
-						flusher.Flush()
-						cliCancel(errInfo.Error)
-					}
-					return
-				}
-
-			// Case 4: Send periodic keep-alive signals
-			// Prevents connection timeouts during long-running requests
-			case <-time.After(3000 * time.Millisecond):
-				// if hasFirstResponse {
-				// 	// Send a ping event to maintain the connection
-				// 	// This is especially important for slow AI model responses
-				// 	output := "event: ping\n"
-				// 	output = output + `data: {"type": "ping"}`
-				// 	output = output + "\n\n"
-				// 	_, _ = c.Writer.Write([]byte(output))
-				//
-				// 	flusher.Flush()
-				// }
-			}
-		}
-	}
-}
-
-// handleClaudeStreamingResponse streams Claude-compatible responses backed by OpenAI.
-// It converts the Claude request into OpenAI responses format, establishes SSE,
-// and translates streaming chunks back into Claude Code events.
-func (h *ClaudeCodeAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {
-
-	// Get the http.Flusher interface to manually flush the response.
-	// This is crucial for streaming as it allows immediate sending of data chunks
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelName := gjson.GetBytes(rawJSON, "model").String()
-
-	// Create a cancellable context for the backend client request
-	// This allows proper cleanup and cancellation of ongoing requests
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		// This prevents deadlocks and ensures proper resource cleanup
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-	// Main client rotation loop with quota management
-	// This loop implements a sophisticated load balancing and failover mechanism
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-
-			if errorResponse.StatusCode == 429 {
-				c.Header("Content-Type", "application/json")
-				c.Header("Content-Length", fmt.Sprintf("%d", len(errorResponse.Error.Error())))
-			}
-			c.Status(errorResponse.StatusCode)
-
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-
-			return
-		}
-
-		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
-			log.Debugf("Request claude use API Key: %s", apiKey)
-		} else {
-			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
-		}
-
-		// Initiate streaming communication with the backend client
-		// This returns two channels: one for response chunks and one for errors
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
-
-		hasFirstResponse := false
-		// Main streaming loop - handles multiple concurrent events using Go channels
-		// This select statement manages four different types of events simultaneously
-		for {
-			select {
-			// Case 1: Handle client disconnection
-			// Detects when the HTTP client has disconnected and cleans up resources
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("ClaudeClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request to prevent resource leaks
-					return
-				}
-
-			// Case 2: Process incoming response chunks from the backend
-			// This handles the actual streaming data from the AI model
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				if !hasFirstResponse {
-					// Set up Server-Sent Events (SSE) headers for streaming response
-					// These headers are essential for maintaining a persistent connection
-					// and enabling real-time streaming of chat completions
-					c.Header("Content-Type", "text/event-stream")
-					c.Header("Cache-Control", "no-cache")
-					c.Header("Connection", "keep-alive")
-					c.Header("Access-Control-Allow-Origin", "*")
-					hasFirstResponse = true
-				}
-
-				_, _ = c.Writer.Write(chunk)
-				_, _ = c.Writer.Write([]byte("\n"))
-				flusher.Flush()
-
-			// Case 3: Handle errors from the backend
-			// This manages various error conditions and implements retry logic
-			case errInfo, okError := <-errChan:
-				if okError {
-					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
-					// Special handling for quota exceeded errors
-					// If configured, attempt to switch to a different project/client
-					// if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						log.Debugf("quota exceeded, switch client")
-						continue outLoop // Restart the client selection process
-					} else {
-						// Forward other errors directly to the client
-						if errInfo.Addon != nil {
-							for key, val := range errInfo.Addon {
-								c.Header(key, val[0])
-							}
-						}
-
-						c.Status(errInfo.StatusCode)
-
-						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
-						flusher.Flush()
-						cliCancel(errInfo.Error)
-					}
-					return
-				}
-
-			// Case 4: Send periodic keep-alive signals
-			// Prevents connection timeouts during long-running requests
-			case <-time.After(3000 * time.Millisecond):
-			}
-		}
-	}
-}
-
-// handleQwenStreamingResponse streams Claude-compatible responses backed by OpenAI.
-// It converts the Claude request into Qwen responses format, establishes SSE,
-// and translates streaming chunks back into Claude Code events.
-func (h *ClaudeCodeAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON []byte) {
-	// Set up Server-Sent Events (SSE) headers for streaming response
-	// These headers are essential for maintaining a persistent connection
-	// and enabling real-time streaming of chat completions
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	// This is crucial for streaming as it allows immediate sending of data chunks
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	// Parse and prepare the Claude request, extracting model name, system instructions,
-	// conversation contents, and available tools from the raw JSON
-	newRequestJSON := translatorClaudeCodeToQwen.ConvertAnthropicRequestToOpenAI(rawJSON)
-	modelName := gjson.GetBytes(rawJSON, "model").String()
-
-	newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
-	// log.Debugf(string(rawJSON))
-	// log.Debugf(newRequestJSON)
-	// return
-	// Create a cancellable context for the backend client request
-	// This allows proper cleanup and cancellation of ongoing requests
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		// This prevents deadlocks and ensures proper resource cleanup
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-	// Main client rotation loop with quota management
-	// This loop implements a sophisticated load balancing and failover mechanism
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request use qwen account: %s", cliClient.GetEmail())
-
-		// Initiate streaming communication with the backend client
-		// This returns two channels: one for response chunks and one for errors
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-
-		// Track response state for proper Claude format conversion
-
-		params := &translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropicParams{
-			MessageID:            "",
-			Model:                "",
-			CreatedAt:            0,
-			ContentAccumulator:   strings.Builder{},
-			ToolCallsAccumulator: nil,
-		}
-
-		// Main streaming loop - handles multiple concurrent events using Go channels
-		// This select statement manages four different types of events simultaneously
-		for {
-			select {
-			// Case 1: Handle client disconnection
-			// Detects when the HTTP client has disconnected and cleans up resources
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request to prevent resource leaks
-					return
-				}
-
-			// Case 2: Process incoming response chunks from the backend
-			// This handles the actual streaming data from the AI model
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n"))
-
-				// Convert the backend response to Claude-compatible format
-				// This translation layer ensures API compatibility
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					outputs := translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropic(jsonData, params)
-					if len(outputs) > 0 {
-						for i := 0; i < len(outputs); i++ {
-							_, _ = c.Writer.Write([]byte("data: "))
-							_, _ = c.Writer.Write([]byte(outputs[i]))
-						}
-					}
-					flusher.Flush() // Immediately send the chunk to the client
-					// hasFirstResponse = true
-				} else {
-					// log.Debugf("chunk: %s", string(chunk))
-				}
-			// Case 3: Handle errors from the backend
-			// This manages various error conditions and implements retry logic
-			case errInfo, okError := <-errChan:
-				if okError {
-					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
-					// Special handling for quota exceeded errors
-					// If configured, attempt to switch to a different project/client
-					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						log.Debugf("quota exceeded, switch client")
-						continue outLoop // Restart the client selection process
-					} else {
-						// Forward other errors directly to the client
-						c.Status(errInfo.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
-						flusher.Flush()
-						cliCancel(errInfo.Error)
-					}
-					return
-				}
-
-			// Case 4: Send periodic keep-alive signals
-			// Prevents connection timeouts during long-running requests
-			case <-time.After(3000 * time.Millisecond):
 			}
 		}
 	}
--- a/internal/api/handlers/gemini/cli/cli_handlers.go
+++ b/internal/api/handlers/gemini/cli/cli_handlers.go
@@ -1,917 +0,0 @@
-// Package cli provides HTTP handlers for Gemini CLI API functionality.
-// This package implements handlers that process CLI-specific requests for Gemini API operations,
-// including content generation and streaming content generation endpoints.
-// The handlers restrict access to localhost only and manage communication with the backend service.
-package cli
-
-import (
-	"bytes"
-	"context"
-	"fmt"
-	"io"
-	"net/http"
-	"strings"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/internal/client"
-	translatorGeminiToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/gemini"
-	translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
-	translatorGeminiToQwen "github.com/luispater/CLIProxyAPI/internal/translator/openai/gemini"
-	"github.com/luispater/CLIProxyAPI/internal/util"
-	log "github.com/sirupsen/logrus"
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-)
-
-// GeminiCLIAPIHandlers contains the handlers for Gemini CLI API endpoints.
-// It holds a pool of clients to interact with the backend service.
-type GeminiCLIAPIHandlers struct {
-	*handlers.APIHandlers
-}
-
-// NewGeminiCLIAPIHandlers creates a new Gemini CLI API handlers instance.
-// It takes an APIHandlers instance as input and returns a GeminiCLIAPIHandlers.
-func NewGeminiCLIAPIHandlers(apiHandlers *handlers.APIHandlers) *GeminiCLIAPIHandlers {
-	return &GeminiCLIAPIHandlers{
-		APIHandlers: apiHandlers,
-	}
-}
-
-// CLIHandler handles CLI-specific requests for Gemini API operations.
-// It restricts access to localhost only and routes requests to appropriate internal handlers.
-func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
-	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
-		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "CLI reply only allow local access",
-				Type:    "forbidden",
-			},
-		})
-		return
-	}
-
-	rawJSON, _ := c.GetRawData()
-	requestRawURI := c.Request.URL.Path
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-	provider := util.GetProviderName(modelName.String())
-
-	if requestRawURI == "/v1internal:generateContent" {
-		if provider == "gemini" || provider == "unknow" {
-			h.handleInternalGenerateContent(c, rawJSON)
-		} else if provider == "gpt" {
-			h.handleCodexInternalGenerateContent(c, rawJSON)
-		} else if provider == "claude" {
-			h.handleClaudeInternalGenerateContent(c, rawJSON)
-		} else if provider == "qwen" {
-			h.handleQwenInternalGenerateContent(c, rawJSON)
-		}
-	} else if requestRawURI == "/v1internal:streamGenerateContent" {
-		if provider == "gemini" || provider == "unknow" {
-			h.handleInternalStreamGenerateContent(c, rawJSON)
-		} else if provider == "gpt" {
-			h.handleCodexInternalStreamGenerateContent(c, rawJSON)
-		} else if provider == "claude" {
-			h.handleClaudeInternalStreamGenerateContent(c, rawJSON)
-		} else if provider == "qwen" {
-			h.handleQwenInternalStreamGenerateContent(c, rawJSON)
-		}
-	} else {
-		reqBody := bytes.NewBuffer(rawJSON)
-		req, err := http.NewRequest("POST", fmt.Sprintf("https://cloudcode-pa.googleapis.com%s", c.Request.URL.RequestURI()), reqBody)
-		if err != nil {
-			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: fmt.Sprintf("Invalid request: %v", err),
-					Type:    "invalid_request_error",
-				},
-			})
-			return
-		}
-		for key, value := range c.Request.Header {
-			req.Header[key] = value
-		}
-
-		httpClient := util.SetProxy(h.Cfg, &http.Client{})
-
-		resp, err := httpClient.Do(req)
-		if err != nil {
-			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: fmt.Sprintf("Invalid request: %v", err),
-					Type:    "invalid_request_error",
-				},
-			})
-			return
-		}
-
-		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-			defer func() {
-				if err = resp.Body.Close(); err != nil {
-					log.Printf("warn: failed to close response body: %v", err)
-				}
-			}()
-			bodyBytes, _ := io.ReadAll(resp.Body)
-
-			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: string(bodyBytes),
-					Type:    "invalid_request_error",
-				},
-			})
-			return
-		}
-
-		defer func() {
-			_ = resp.Body.Close()
-		}()
-
-		for key, value := range resp.Header {
-			c.Header(key, value[0])
-		}
-		output, err := io.ReadAll(resp.Body)
-		if err != nil {
-			log.Errorf("Failed to read response body: %v", err)
-			return
-		}
-		_, _ = c.Writer.Write(output)
-		c.Set("API_RESPONSE", output)
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
-	alt := h.GetAlt(c)
-
-	if alt == "" {
-		c.Header("Content-Type", "text/event-stream")
-		c.Header("Cache-Control", "no-cache")
-		c.Header("Connection", "keep-alive")
-		c.Header("Access-Control-Allow-Origin", "*")
-	}
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-		} else {
-			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
-		}
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
-		hasFirstResponse := false
-
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				hasFirstResponse = true
-				if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() != "" {
-					chunk, _ = sjson.SetRawBytes(chunk, "response", chunk)
-				}
-				_, _ = c.Writer.Write([]byte("data: "))
-				_, _ = c.Writer.Write(chunk)
-				_, _ = c.Writer.Write([]byte("\n\n"))
-
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-				if hasFirstResponse {
-					_, _ = c.Writer.Write([]byte("\n"))
-					flusher.Flush()
-				}
-			}
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-	// log.Debugf("GenerateContent: %s", string(rawJSON))
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-		} else {
-			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
-		}
-
-		resp, err := cliClient.SendRawMessage(cliCtx, rawJSON, "")
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				// log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			_, _ = c.Writer.Write(resp)
-			cliCancel(resp)
-			break
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleCodexInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
-	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
-	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
-	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
-
-	// log.Debugf("Request: %s", string(rawJSON))
-	// return
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorGeminiToCodex.ConvertGeminiRequestToCodex(rawJSON)
-	// log.Debugf("Request: %s", newRequestJSON)
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-
-		params := &translatorGeminiToCodex.ConvertCodexResponseToGeminiParams{
-			Model:             modelName.String(),
-			CreatedAt:         0,
-			ResponseID:        "",
-			LastStorageOutput: "",
-		}
-
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-				// _, _ = logFile.Write(chunk)
-				// _, _ = logFile.Write([]byte("\n"))
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					data := gjson.ParseBytes(jsonData)
-					typeResult := data.Get("type")
-					if typeResult.String() != "" {
-						outputs := translatorGeminiToCodex.ConvertCodexResponseToGemini(jsonData, params)
-						if len(outputs) > 0 {
-							for i := 0; i < len(outputs); i++ {
-								outputs[i], _ = sjson.SetRaw("{}", "response", outputs[i])
-								_, _ = c.Writer.Write([]byte("data: "))
-								_, _ = c.Writer.Write([]byte(outputs[i]))
-								_, _ = c.Writer.Write([]byte("\n\n"))
-							}
-						}
-					}
-				}
-				flusher.Flush()
-			// Handle errors from the backend.
-			case errMessage, okError := <-errChan:
-				if okError {
-					if errMessage.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						// log.Debugf("code: %d, error: %s", errMessage.StatusCode, errMessage.Error.Error())
-						c.Status(errMessage.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, errMessage.Error.Error())
-						flusher.Flush()
-						cliCancel(errMessage.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleCodexInternalGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-	// orgRawJSON := rawJSON
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
-	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
-	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
-	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorGeminiToCodex.ConvertGeminiRequestToCodex(rawJSON)
-	// log.Debugf("Request: %s", newRequestJSON)
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					data := gjson.ParseBytes(jsonData)
-					typeResult := data.Get("type")
-					if typeResult.String() != "" {
-						var geminiStr string
-						geminiStr = translatorGeminiToCodex.ConvertCodexResponseToGeminiNonStream(jsonData, modelName.String())
-						if geminiStr != "" {
-							_, _ = c.Writer.Write([]byte(geminiStr))
-						}
-					}
-				}
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						// log.Debugf("org: %s", string(orgRawJSON))
-						// log.Debugf("raw: %s", string(rawJSON))
-						// log.Debugf("newRequestJSON: %s", newRequestJSON)
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleClaudeInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
-	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
-	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
-	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
-	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
-			log.Debugf("Request claude use API Key: %s", apiKey)
-		} else {
-			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-
-		params := &translatorGeminiToClaude.ConvertAnthropicResponseToGeminiParams{
-			Model:      modelName.String(),
-			CreatedAt:  0,
-			ResponseID: "",
-		}
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					data := gjson.ParseBytes(jsonData)
-					typeResult := data.Get("type")
-					if typeResult.String() != "" {
-						// log.Debugf(string(jsonData))
-						outputs := translatorGeminiToClaude.ConvertAnthropicResponseToGemini(jsonData, params)
-						if len(outputs) > 0 {
-							for i := 0; i < len(outputs); i++ {
-								outputs[i], _ = sjson.SetRaw("{}", "response", outputs[i])
-								_, _ = c.Writer.Write([]byte("data: "))
-								_, _ = c.Writer.Write([]byte(outputs[i]))
-								_, _ = c.Writer.Write([]byte("\n\n"))
-							}
-						}
-					}
-					// log.Debugf(string(jsonData))
-				}
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleClaudeInternalGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
-	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
-	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
-	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
-	// log.Debugf("Request: %s", newRequestJSON)
-	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
-			log.Debugf("Request claude use API Key: %s", apiKey)
-		} else {
-			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-
-		var allChunks [][]byte
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					if len(allChunks) > 0 {
-						// Use the last chunk which should contain the complete message
-						finalResponseStr := translatorGeminiToClaude.ConvertAnthropicResponseToGeminiNonStream(allChunks, modelName.String())
-						finalResponse := []byte(finalResponseStr)
-						_, _ = c.Writer.Write(finalResponse)
-					}
-
-					cliCancel()
-					return
-				}
-
-				// Store chunk for building final response
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					allChunks = append(allChunks, jsonData)
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleQwenInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
-	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
-	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
-	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorGeminiToQwen.ConvertGeminiRequestToOpenAI(rawJSON)
-	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
-
-	// log.Debugf("Request: %s", string(rawJSON))
-	// return
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request qwen use account: %s", cliClient.(*client.QwenClient).GetEmail())
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-
-		params := &translatorGeminiToQwen.ConvertOpenAIResponseToGeminiParams{
-			ToolCallsAccumulator: nil,
-			ContentAccumulator:   strings.Builder{},
-			IsFirstChunk:         false,
-		}
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					// log.Debugf(string(jsonData))
-					outputs := translatorGeminiToQwen.ConvertOpenAIResponseToGemini(jsonData, params)
-					if len(outputs) > 0 {
-						for i := 0; i < len(outputs); i++ {
-							outputs[i], _ = sjson.SetRaw("{}", "response", outputs[i])
-							_, _ = c.Writer.Write([]byte("data: "))
-							_, _ = c.Writer.Write([]byte(outputs[i]))
-							_, _ = c.Writer.Write([]byte("\n\n"))
-						}
-					}
-					// log.Debugf(string(jsonData))
-				}
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) handleQwenInternalGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
-	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
-	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
-	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorGeminiToQwen.ConvertGeminiRequestToOpenAI(rawJSON)
-	// log.Debugf("Request: %s", newRequestJSON)
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request use qwen account: %s", cliClient.GetEmail())
-
-		resp, err := cliClient.SendRawMessage(cliCtx, []byte(newRequestJSON), "")
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			h.AddAPIResponseData(c, resp)
-			h.AddAPIResponseData(c, []byte("\n"))
-
-			newResp := translatorGeminiToQwen.ConvertOpenAINonStreamResponseToGemini(resp)
-			_, _ = c.Writer.Write([]byte(newResp))
-			cliCancel(resp)
-			break
-		}
-	}
-}
--- a/internal/api/handlers/gemini/gemini-cli_handlers.go
+++ b/internal/api/handlers/gemini/gemini-cli_handlers.go
@@ -0,0 +1,268 @@
+// Package gemini provides HTTP handlers for Gemini CLI API functionality.
+// This package implements handlers that process CLI-specific requests for Gemini API operations,
+// including content generation and streaming content generation endpoints.
+// The handlers restrict access to localhost only and manage communication with the backend service.
+package gemini
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
+	. "github.com/luispater/CLIProxyAPI/internal/constant"
+	"github.com/luispater/CLIProxyAPI/internal/interfaces"
+	"github.com/luispater/CLIProxyAPI/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+)
+
+// GeminiCLIAPIHandler contains the handlers for Gemini CLI API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type GeminiCLIAPIHandler struct {
+	*handlers.BaseAPIHandler
+}
+
+// NewGeminiCLIAPIHandler creates a new Gemini CLI API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns a GeminiCLIAPIHandler.
+func NewGeminiCLIAPIHandler(apiHandlers *handlers.BaseAPIHandler) *GeminiCLIAPIHandler {
+	return &GeminiCLIAPIHandler{
+		BaseAPIHandler: apiHandlers,
+	}
+}
+
+// HandlerType returns the type of this handler.
+func (h *GeminiCLIAPIHandler) HandlerType() string {
+	return GEMINICLI
+}
+
+// Models returns a list of models supported by this handler.
+func (h *GeminiCLIAPIHandler) Models() []map[string]any {
+	return make([]map[string]any, 0)
+}
+
+// CLIHandler handles CLI-specific requests for Gemini API operations.
+// It restricts access to localhost only and routes requests to appropriate internal handlers.
+func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
+	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
+		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "CLI reply only allow local access",
+				Type:    "forbidden",
+			},
+		})
+		return
+	}
+
+	rawJSON, _ := c.GetRawData()
+	requestRawURI := c.Request.URL.Path
+
+	if requestRawURI == "/v1internal:generateContent" {
+		h.handleInternalGenerateContent(c, rawJSON)
+	} else if requestRawURI == "/v1internal:streamGenerateContent" {
+		h.handleInternalStreamGenerateContent(c, rawJSON)
+	} else {
+		reqBody := bytes.NewBuffer(rawJSON)
+		req, err := http.NewRequest("POST", fmt.Sprintf("https://cloudcode-pa.googleapis.com%s", c.Request.URL.RequestURI()), reqBody)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: fmt.Sprintf("Invalid request: %v", err),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+		for key, value := range c.Request.Header {
+			req.Header[key] = value
+		}
+
+		httpClient := util.SetProxy(h.Cfg, &http.Client{})
+
+		resp, err := httpClient.Do(req)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: fmt.Sprintf("Invalid request: %v", err),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+
+		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+			defer func() {
+				if err = resp.Body.Close(); err != nil {
+					log.Printf("warn: failed to close response body: %v", err)
+				}
+			}()
+			bodyBytes, _ := io.ReadAll(resp.Body)
+
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: string(bodyBytes),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+
+		defer func() {
+			_ = resp.Body.Close()
+		}()
+
+		for key, value := range resp.Header {
+			c.Header(key, value[0])
+		}
+		output, err := io.ReadAll(resp.Body)
+		if err != nil {
+			log.Errorf("Failed to read response body: %v", err)
+			return
+		}
+		_, _ = c.Writer.Write(output)
+		c.Set("API_RESPONSE", output)
+	}
+}
+
+// handleInternalStreamGenerateContent handles streaming content generation requests.
+// It sets up a server-sent event stream and forwards the request to the backend client.
+// The function continuously proxies response chunks from the backend to the client.
+func (h *GeminiCLIAPIHandler) handleInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	alt := h.GetAlt(c)
+
+	if alt == "" {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+
+	var cliClient interfaces.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *interfaces.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, "")
+
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+				_, _ = c.Writer.Write([]byte("data: "))
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n\n"))
+
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+// handleInternalGenerateContent handles non-streaming content generation requests.
+// It sends a request to the backend client and proxies the entire response back to the client at once.
+func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+
+	var cliClient interfaces.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *interfaces.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			cliCancel()
+			return
+		}
+
+		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				// log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
+				cliCancel(err.Error)
+			}
+			break
+		} else {
+			_, _ = c.Writer.Write(resp)
+			cliCancel(resp)
+			break
+		}
+	}
+}
--- a/internal/api/handlers/gemini/gemini_handlers.go
+++ b/internal/api/handlers/gemini/gemini_handlers.go
--- a/internal/api/handlers/handlers.go
+++ b/internal/api/handlers/handlers.go
@@ -10,6 +10,7 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/luispater/CLIProxyAPI/internal/client"
 	"github.com/luispater/CLIProxyAPI/internal/config"
+	"github.com/luispater/CLIProxyAPI/internal/interfaces"
 	"github.com/luispater/CLIProxyAPI/internal/util"
 	log "github.com/sirupsen/logrus"
 	"golang.org/x/net/context"
@@ -35,12 +36,12 @@ type ErrorDetail struct {
 	Code string `json:"code,omitempty"`
 }

-// APIHandlers contains the handlers for API endpoints.
+// BaseAPIHandler contains the handlers for API endpoints.
 // It holds a pool of clients to interact with the backend service and manages
 // load balancing, client selection, and configuration.
-type APIHandlers struct {
+type BaseAPIHandler struct {
 	// CliClients is the pool of available AI service clients.
-	CliClients []client.Client
+	CliClients []interfaces.Client

 	// Cfg holds the current application configuration.
 	Cfg *config.Config
@@ -51,12 +52,9 @@ type APIHandlers struct {
 	// LastUsedClientIndex tracks the last used client index for each provider
 	// to implement round-robin load balancing.
 	LastUsedClientIndex map[string]int
-
-	// apiResponseData recording provider api response data
-	apiResponseData map[*gin.Context][]byte
 }

-// NewAPIHandlers creates a new API handlers instance.
+// NewBaseAPIHandlers creates a new API handlers instance.
 // It takes a slice of clients and configuration as input.
 //
 // Parameters:
@@ -64,14 +62,13 @@ type APIHandlers struct {
 //   - cfg: The application configuration
 //
 // Returns:
-//   - *APIHandlers: A new API handlers instance
-func NewAPIHandlers(cliClients []client.Client, cfg *config.Config) *APIHandlers {
-	return &APIHandlers{
+//   - *BaseAPIHandler: A new API handlers instance
+func NewBaseAPIHandlers(cliClients []interfaces.Client, cfg *config.Config) *BaseAPIHandler {
+	return &BaseAPIHandler{
 		CliClients:          cliClients,
 		Cfg:                 cfg,
 		Mutex:               &sync.Mutex{},
 		LastUsedClientIndex: make(map[string]int),
-		apiResponseData:     make(map[*gin.Context][]byte),
 	}
 }

@@ -81,7 +78,7 @@ func NewAPIHandlers(cliClients []client.Client, cfg *config.Config) *APIHandlers
 // Parameters:
 //   - clients: The new slice of AI service clients
 //   - cfg: The new application configuration
-func (h *APIHandlers) UpdateClients(clients []client.Client, cfg *config.Config) {
+func (h *BaseAPIHandler) UpdateClients(clients []interfaces.Client, cfg *config.Config) {
 	h.CliClients = clients
 	h.Cfg = cfg
 }
@@ -97,66 +94,47 @@ func (h *APIHandlers) UpdateClients(clients []client.Client, cfg *config.Config)
 // Returns:
 //   - client.Client: An available client for the requested model
 //   - *client.ErrorMessage: An error message if no client is available
-func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (client.Client, *client.ErrorMessage) {
-	provider := util.GetProviderName(modelName)
-	clients := make([]client.Client, 0)
-	if provider == "gemini" {
-		for i := 0; i < len(h.CliClients); i++ {
-			if cli, ok := h.CliClients[i].(*client.GeminiClient); ok {
-				clients = append(clients, cli)
-			}
-		}
-	} else if provider == "gpt" {
-		for i := 0; i < len(h.CliClients); i++ {
-			if cli, ok := h.CliClients[i].(*client.CodexClient); ok {
-				clients = append(clients, cli)
-			}
-		}
-	} else if provider == "claude" {
-		for i := 0; i < len(h.CliClients); i++ {
-			if cli, ok := h.CliClients[i].(*client.ClaudeClient); ok {
-				clients = append(clients, cli)
-			}
-		}
-	} else if provider == "qwen" {
-		for i := 0; i < len(h.CliClients); i++ {
-			if cli, ok := h.CliClients[i].(*client.QwenClient); ok {
-				clients = append(clients, cli)
-			}
+func (h *BaseAPIHandler) GetClient(modelName string, isGenerateContent ...bool) (interfaces.Client, *interfaces.ErrorMessage) {
+	clients := make([]interfaces.Client, 0)
+	for i := 0; i < len(h.CliClients); i++ {
+		if h.CliClients[i].CanProvideModel(modelName) {
+			clients = append(clients, h.CliClients[i])
 		}
 	}

-	if _, hasKey := h.LastUsedClientIndex[provider]; !hasKey {
-		h.LastUsedClientIndex[provider] = 0
+	if _, hasKey := h.LastUsedClientIndex[modelName]; !hasKey {
+		h.LastUsedClientIndex[modelName] = 0
 	}

 	if len(clients) == 0 {
-		return nil, &client.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
+		return nil, &interfaces.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
 	}

-	var cliClient client.Client
+	var cliClient interfaces.Client

 	// Lock the mutex to update the last used client index
 	h.Mutex.Lock()
-	startIndex := h.LastUsedClientIndex[provider]
+	startIndex := h.LastUsedClientIndex[modelName]
 	if (len(isGenerateContent) > 0 && isGenerateContent[0]) || len(isGenerateContent) == 0 {
 		currentIndex := (startIndex + 1) % len(clients)
-		h.LastUsedClientIndex[provider] = currentIndex
+		h.LastUsedClientIndex[modelName] = currentIndex
 	}
 	h.Mutex.Unlock()

 	// Reorder the client to start from the last used index
-	reorderedClients := make([]client.Client, 0)
+	reorderedClients := make([]interfaces.Client, 0)
 	for i := 0; i < len(clients); i++ {
 		cliClient = clients[(startIndex+1+i)%len(clients)]
 		if cliClient.IsModelQuotaExceeded(modelName) {
-			if provider == "gemini" {
-				log.Debugf("Gemini Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
-			} else if provider == "gpt" {
+			if cliClient.Provider() == "gemini-cli" {
+				log.Debugf("Gemini Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.(*client.GeminiCLIClient).GetProjectID())
+			} else if cliClient.Provider() == "gemini" {
+				log.Debugf("Gemini Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
+			} else if cliClient.Provider() == "codex" {
 				log.Debugf("Codex Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
-			} else if provider == "claude" {
+			} else if cliClient.Provider() == "claude" {
 				log.Debugf("Claude Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
-			} else if provider == "qwen" {
+			} else if cliClient.Provider() == "qwen" {
 				log.Debugf("Qwen Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
 			}
 			cliClient = nil
@@ -167,11 +145,11 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
 	}

 	if len(reorderedClients) == 0 {
-		if provider == "claude" {
+		if util.GetProviderName(modelName) == "claude" {
 			// log.Debugf("Claude Model %s is quota exceeded for all accounts", modelName)
-			return nil, &client.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account's rate limit. Please try again later."}}`)}
+			return nil, &interfaces.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account's rate limit. Please try again later."}}`)}
 		}
-		return nil, &client.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName)}
+		return nil, &interfaces.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName)}
 	}

 	locked := false
@@ -198,7 +176,7 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
 //
 // Returns:
 //   - string: The alt parameter value, or empty string if it's "sse"
-func (h *APIHandlers) GetAlt(c *gin.Context) string {
+func (h *BaseAPIHandler) GetAlt(c *gin.Context) string {
 	var alt string
 	var hasAlt bool
 	alt, hasAlt = c.GetQuery("alt")
@@ -211,9 +189,22 @@ func (h *APIHandlers) GetAlt(c *gin.Context) string {
 	return alt
 }

-func (h *APIHandlers) GetContextWithCancel(c *gin.Context, ctx context.Context) (context.Context, APIHandlerCancelFunc) {
+// GetContextWithCancel creates a new context with cancellation capabilities.
+// It embeds the Gin context and the API handler into the new context for later use.
+// The returned cancel function also handles logging the API response if request logging is enabled.
+//
+// Parameters:
+//   - handler: The API handler associated with the request.
+//   - c: The Gin context of the current request.
+//   - ctx: The parent context.
+//
+// Returns:
+//   - context.Context: The new context with cancellation and embedded values.
+//   - APIHandlerCancelFunc: A function to cancel the context and log the response.
+func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *gin.Context, ctx context.Context) (context.Context, APIHandlerCancelFunc) {
 	newCtx, cancel := context.WithCancel(ctx)
 	newCtx = context.WithValue(newCtx, "gin", c)
+	newCtx = context.WithValue(newCtx, "handler", handler)
 	return newCtx, func(params ...interface{}) {
 		if h.Cfg.RequestLog {
 			if len(params) == 1 {
@@ -228,11 +219,6 @@ func (h *APIHandlers) GetContextWithCancel(c *gin.Context, ctx context.Context)
 				case bool:
 				case nil:
 				}
-			} else {
-				if _, hasKey := h.apiResponseData[c]; hasKey {
-					c.Set("API_RESPONSE", h.apiResponseData[c])
-					delete(h.apiResponseData, c)
-				}
 			}
 		}

@@ -240,13 +226,6 @@ func (h *APIHandlers) GetContextWithCancel(c *gin.Context, ctx context.Context)
 	}
 }

-func (h *APIHandlers) AddAPIResponseData(c *gin.Context, data []byte) {
-	if h.Cfg.RequestLog {
-		if _, hasKey := h.apiResponseData[c]; !hasKey {
-			h.apiResponseData[c] = make([]byte, 0)
-		}
-		h.apiResponseData[c] = append(h.apiResponseData[c], data...)
-	}
-}
-
+// APIHandlerCancelFunc is a function type for canceling an API handler's context.
+// It can optionally accept parameters, which are used for logging the response.
 type APIHandlerCancelFunc func(params ...interface{})
--- a/internal/api/handlers/openai/openai_handlers.go
+++ b/internal/api/handlers/openai/openai_handlers.go
@@ -7,126 +7,130 @@
 package openai

 import (
-	"bytes"
 	"context"
 	"fmt"
 	"net/http"
 	"time"

+	"github.com/gin-gonic/gin"
 	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/internal/client"
-	translatorOpenAIToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/openai"
-	translatorOpenAIToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/openai"
-	translatorOpenAIToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/openai"
-	"github.com/luispater/CLIProxyAPI/internal/util"
+	. "github.com/luispater/CLIProxyAPI/internal/constant"
+	"github.com/luispater/CLIProxyAPI/internal/interfaces"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-
-	"github.com/gin-gonic/gin"
 )

-// OpenAIAPIHandlers contains the handlers for OpenAI API endpoints.
+// OpenAIAPIHandler contains the handlers for OpenAI API endpoints.
 // It holds a pool of clients to interact with the backend service.
-type OpenAIAPIHandlers struct {
-	*handlers.APIHandlers
+type OpenAIAPIHandler struct {
+	*handlers.BaseAPIHandler
 }

-// NewOpenAIAPIHandlers creates a new OpenAI API handlers instance.
-// It takes an APIHandlers instance as input and returns an OpenAIAPIHandlers.
+// NewOpenAIAPIHandler creates a new OpenAI API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns an OpenAIAPIHandler.
 //
 // Parameters:
 //   - apiHandlers: The base API handlers instance
 //
 // Returns:
-//   - *OpenAIAPIHandlers: A new OpenAI API handlers instance
-func NewOpenAIAPIHandlers(apiHandlers *handlers.APIHandlers) *OpenAIAPIHandlers {
-	return &OpenAIAPIHandlers{
-		APIHandlers: apiHandlers,
+//   - *OpenAIAPIHandler: A new OpenAI API handlers instance
+func NewOpenAIAPIHandler(apiHandlers *handlers.BaseAPIHandler) *OpenAIAPIHandler {
+	return &OpenAIAPIHandler{
+		BaseAPIHandler: apiHandlers,
 	}
 }

-// Models handles the /v1/models endpoint.
+// HandlerType returns the identifier for this handler implementation.
+func (h *OpenAIAPIHandler) HandlerType() string {
+	return OPENAI
+}
+
+// Models returns the OpenAI-compatible model metadata supported by this handler.
+func (h *OpenAIAPIHandler) Models() []map[string]any {
+	return []map[string]any{
+		{
+			"id":                    "gemini-2.5-pro",
+			"object":                "model",
+			"version":               "2.5",
+			"name":                  "Gemini 2.5 Pro",
+			"description":           "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			"context_length":        1_048_576,
+			"max_completion_tokens": 65_536,
+			"supported_parameters": []string{
+				"tools",
+				"temperature",
+				"top_p",
+				"top_k",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		},
+		{
+			"id":                    "gemini-2.5-flash",
+			"object":                "model",
+			"version":               "001",
+			"name":                  "Gemini 2.5 Flash",
+			"description":           "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			"context_length":        1_048_576,
+			"max_completion_tokens": 65_536,
+			"supported_parameters": []string{
+				"tools",
+				"temperature",
+				"top_p",
+				"top_k",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		},
+		{
+			"id":                    "gpt-5",
+			"object":                "model",
+			"version":               "gpt-5-2025-08-07",
+			"name":                  "GPT 5",
+			"description":           "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			"context_length":        400_000,
+			"max_completion_tokens": 128_000,
+			"supported_parameters": []string{
+				"tools",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		},
+		{
+			"id":                    "claude-opus-4-1-20250805",
+			"object":                "model",
+			"version":               "claude-opus-4-1-20250805",
+			"name":                  "Claude Opus 4.1",
+			"description":           "Anthropic's most capable model.",
+			"context_length":        200_000,
+			"max_completion_tokens": 32_000,
+			"supported_parameters": []string{
+				"tools",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		},
+	}
+}
+
+// OpenAIModels handles the /v1/models endpoint.
 // It returns a hardcoded list of available AI models with their capabilities
 // and specifications in OpenAI-compatible format.
-func (h *OpenAIAPIHandlers) Models(c *gin.Context) {
+func (h *OpenAIAPIHandler) OpenAIModels(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{
-		"data": []map[string]any{
-			{
-				"id":                    "gemini-2.5-pro",
-				"object":                "model",
-				"version":               "2.5",
-				"name":                  "Gemini 2.5 Pro",
-				"description":           "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-				"context_length":        1_048_576,
-				"max_completion_tokens": 65_536,
-				"supported_parameters": []string{
-					"tools",
-					"temperature",
-					"top_p",
-					"top_k",
-				},
-				"temperature":    1,
-				"topP":           0.95,
-				"topK":           64,
-				"maxTemperature": 2,
-				"thinking":       true,
-			},
-			{
-				"id":                    "gemini-2.5-flash",
-				"object":                "model",
-				"version":               "001",
-				"name":                  "Gemini 2.5 Flash",
-				"description":           "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-				"context_length":        1_048_576,
-				"max_completion_tokens": 65_536,
-				"supported_parameters": []string{
-					"tools",
-					"temperature",
-					"top_p",
-					"top_k",
-				},
-				"temperature":    1,
-				"topP":           0.95,
-				"topK":           64,
-				"maxTemperature": 2,
-				"thinking":       true,
-			},
-			{
-				"id":                    "gpt-5",
-				"object":                "model",
-				"version":               "gpt-5-2025-08-07",
-				"name":                  "GPT 5",
-				"description":           "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-				"context_length":        400_000,
-				"max_completion_tokens": 128_000,
-				"supported_parameters": []string{
-					"tools",
-				},
-				"temperature":    1,
-				"topP":           0.95,
-				"topK":           64,
-				"maxTemperature": 2,
-				"thinking":       true,
-			},
-			{
-				"id":                    "claude-opus-4-1-20250805",
-				"object":                "model",
-				"version":               "claude-opus-4-1-20250805",
-				"name":                  "Claude Opus 4.1",
-				"description":           "Anthropic's most capable model.",
-				"context_length":        200_000,
-				"max_completion_tokens": 32_000,
-				"supported_parameters": []string{
-					"tools",
-				},
-				"temperature":    1,
-				"topP":           0.95,
-				"topK":           64,
-				"maxTemperature": 2,
-				"thinking":       true,
-			},
-		},
+		"data": h.Models(),
 	})
 }

@@ -136,7 +140,7 @@ func (h *OpenAIAPIHandlers) Models(c *gin.Context) {
 //
 // Parameters:
 //   - c: The Gin context containing the HTTP request and response
-func (h *OpenAIAPIHandlers) ChatCompletions(c *gin.Context) {
+func (h *OpenAIAPIHandler) ChatCompletions(c *gin.Context) {
 	rawJSON, err := c.GetRawData()
 	// If data retrieval fails, return a 400 Bad Request error.
 	if err != nil {
@@ -151,50 +155,28 @@ func (h *OpenAIAPIHandlers) ChatCompletions(c *gin.Context) {

 	// Check if the client requested a streaming response.
 	streamResult := gjson.GetBytes(rawJSON, "stream")
-	modelName := gjson.GetBytes(rawJSON, "model")
-	provider := util.GetProviderName(modelName.String())
-	if provider == "gemini" {
-		if streamResult.Type == gjson.True {
-			h.handleGeminiStreamingResponse(c, rawJSON)
-		} else {
-			h.handleGeminiNonStreamingResponse(c, rawJSON)
-		}
-	} else if provider == "gpt" {
-		if streamResult.Type == gjson.True {
-			h.handleCodexStreamingResponse(c, rawJSON)
-		} else {
-			h.handleCodexNonStreamingResponse(c, rawJSON)
-		}
-	} else if provider == "claude" {
-		if streamResult.Type == gjson.True {
-			h.handleClaudeStreamingResponse(c, rawJSON)
-		} else {
-			h.handleClaudeNonStreamingResponse(c, rawJSON)
-		}
-	} else if provider == "qwen" {
-		// qwen3-coder-plus / qwen3-coder-flash
-		if streamResult.Type == gjson.True {
-			h.handleQwenStreamingResponse(c, rawJSON)
-		} else {
-			h.handleQwenNonStreamingResponse(c, rawJSON)
-		}
+	if streamResult.Type == gjson.True {
+		h.handleStreamingResponse(c, rawJSON)
+	} else {
+		h.handleNonStreamingResponse(c, rawJSON)
 	}
+
 }

-// handleGeminiNonStreamingResponse handles non-streaming chat completion responses
+// handleNonStreamingResponse handles non-streaming chat completion responses
 // for Gemini models. It selects a client from the pool, sends the request, and
 // aggregates the response before sending it back to the client in OpenAI format.
 //
 // Parameters:
 //   - c: The Gin context containing the HTTP request and response
 //   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []byte) {
 	c.Header("Content-Type", "application/json")

-	modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())

-	var cliClient client.Client
+	var cliClient interfaces.Client
 	defer func() {
 		if cliClient != nil {
 			cliClient.GetRequestMutex().Unlock()
@@ -202,7 +184,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
 	}()

 	for {
-		var errorResponse *client.ErrorMessage
+		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
@@ -211,598 +193,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
 			return
 		}

-		isGlAPIKey := false
-		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-			isGlAPIKey = true
-		} else {
-			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
-		}
-
-		resp, err := cliClient.SendMessage(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel(err.Error)
-			}
-			break
-		} else {
-			openAIFormat := translatorOpenAIToGeminiCli.ConvertCliResponseToOpenAIChatNonStream(resp, time.Now().Unix(), isGlAPIKey)
-			if openAIFormat != "" {
-				_, _ = c.Writer.Write([]byte(openAIFormat))
-			}
-			cliCancel(resp)
-			break
-		}
-	}
-}
-
-// handleGeminiStreamingResponse handles streaming responses for Gemini models.
-// It establishes a streaming connection with the backend service and forwards
-// the response chunks to the client in real-time using Server-Sent Events.
-//
-// Parameters:
-//   - c: The Gin context containing the HTTP request and response
-//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	// Prepare the request for the backend client.
-	modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		isGlAPIKey := false
-		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-			isGlAPIKey = true
-		} else {
-			log.Debugf("Request cli use account: %s, project id: %s", cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
-		}
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
-
-		hasFirstResponse := false
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					// Stream is closed, send the final [DONE] message.
-					_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				// Convert the chunk to OpenAI format and send it to the client.
-				hasFirstResponse = true
-				openAIFormat := translatorOpenAIToGeminiCli.ConvertCliResponseToOpenAIChat(chunk, time.Now().Unix(), isGlAPIKey)
-				if openAIFormat != "" {
-					_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormat)
-					flusher.Flush()
-				}
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-				if hasFirstResponse {
-					_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
-					flusher.Flush()
-				}
-			}
-		}
-	}
-}
-
-// handleCodexNonStreamingResponse handles non-streaming chat completion responses
-// for OpenAI models. It selects a client from the pool, sends the request, and
-// aggregates the response before sending it back to the client in OpenAI format.
-//
-// Parameters:
-//   - c: The Gin context containing the HTTP request and response
-//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleCodexNonStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	newRequestJSON := translatorOpenAIToCodex.ConvertOpenAIChatRequestToCodex(rawJSON)
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = c.Writer.Write([]byte(errorResponse.Error.Error()))
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					data := gjson.ParseBytes(jsonData)
-					typeResult := data.Get("type")
-					if typeResult.String() == "response.completed" {
-						responseResult := data.Get("response")
-						openaiStr := translatorOpenAIToCodex.ConvertCodexResponseToOpenAIChatNonStream(responseResult.Raw, time.Now().Unix())
-						_, _ = c.Writer.Write([]byte(openaiStr))
-					}
-				}
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = c.Writer.Write([]byte(err.Error.Error()))
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-// handleCodexStreamingResponse handles streaming responses for OpenAI models.
-// It establishes a streaming connection with the backend service and forwards
-// the response chunks to the client in real-time using Server-Sent Events.
-//
-// Parameters:
-//   - c: The Gin context containing the HTTP request and response
-//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorOpenAIToCodex.ConvertOpenAIChatRequestToCodex(rawJSON)
-	// log.Debugf("Request: %s", newRequestJSON)
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
-
-		// Send the message and receive response chunks and errors via channels.
-		var params *translatorOpenAIToCodex.ConvertCliToOpenAIParams
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					_, _ = c.Writer.Write([]byte("[done]\n\n"))
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				// log.Debugf("Response: %s\n", string(chunk))
-				// Convert the chunk to OpenAI format and send it to the client.
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					data := gjson.ParseBytes(jsonData)
-					typeResult := data.Get("type")
-					if typeResult.String() != "" {
-						var openaiStr string
-						params, openaiStr = translatorOpenAIToCodex.ConvertCodexResponseToOpenAIChat(jsonData, params)
-						if openaiStr != "" {
-							_, _ = c.Writer.Write([]byte("data: "))
-							_, _ = c.Writer.Write([]byte(openaiStr))
-							_, _ = c.Writer.Write([]byte("\n\n"))
-						}
-					}
-					// log.Debugf(string(jsonData))
-				}
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-// handleClaudeNonStreamingResponse handles non-streaming chat completion responses
-// for anthropic models. It uses the streaming interface internally but aggregates
-// all responses before sending back a complete non-streaming response in OpenAI format.
-//
-// Parameters:
-//   - c: The Gin context containing the HTTP request and response
-//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleClaudeNonStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	// Force streaming in the request to use the streaming interface
-	newRequestJSON := translatorOpenAIToClaude.ConvertOpenAIRequestToAnthropic(rawJSON)
-	// Ensure stream is set to true for the backend request
-	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
-
-	modelName := gjson.GetBytes(rawJSON, "model")
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
-			log.Debugf("Request claude use API Key: %s", apiKey)
-		} else {
-			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
-		}
-
-		// Use streaming interface but collect all responses
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-
-		// Collect all streaming chunks to build the final response
-		var allChunks [][]byte
-
-		for {
-			select {
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
-					cliCancel()
-					return
-				}
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					// All chunks received, now build the final non-streaming response
-					if len(allChunks) > 0 {
-						// Use the last chunk which should contain the complete message
-						finalResponseStr := translatorOpenAIToClaude.ConvertAnthropicStreamingResponseToOpenAINonStream(allChunks)
-						finalResponse := []byte(finalResponseStr)
-						_, _ = c.Writer.Write(finalResponse)
-					}
-					cliCancel()
-					return
-				}
-
-				// Store chunk for building final response
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					allChunks = append(allChunks, jsonData)
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						cliCancel(err.Error)
-					}
-					return
-				}
-			case <-time.After(30 * time.Second):
-			}
-		}
-	}
-}
-
-// handleClaudeStreamingResponse handles streaming responses for anthropic models.
-// It establishes a streaming connection with the backend service and forwards
-// the response chunks to the client in real-time using Server-Sent Events.
-//
-// Parameters:
-//   - c: The Gin context containing the HTTP request and response
-//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	// Prepare the request for the backend client.
-	newRequestJSON := translatorOpenAIToClaude.ConvertOpenAIRequestToAnthropic(rawJSON)
-	modelName := gjson.GetBytes(rawJSON, "model")
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName.String())
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
-			log.Debugf("Request claude use API Key: %s", apiKey)
-		} else {
-			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
-		params := &translatorOpenAIToClaude.ConvertAnthropicResponseToOpenAIParams{
-			CreatedAt:    0,
-			ResponseID:   "",
-			FinishReason: "",
-		}
-
-		hasFirstResponse := false
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-
-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n\n"))
-
-				if bytes.HasPrefix(chunk, []byte("data: ")) {
-					jsonData := chunk[6:]
-					// Convert the chunk to OpenAI format and send it to the client.
-					hasFirstResponse = true
-					openAIFormats := translatorOpenAIToClaude.ConvertAnthropicResponseToOpenAI(jsonData, params)
-					for i := 0; i < len(openAIFormats); i++ {
-						_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormats[i])
-						flusher.Flush()
-					}
-				}
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel(err.Error)
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-				if hasFirstResponse {
-					_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
-					flusher.Flush()
-				}
-			}
-		}
-	}
-}
-
-// handleQwenNonStreamingResponse handles non-streaming chat completion responses
-// for Qwen models. It selects a client from the pool, sends the request, and
-// aggregates the response before sending it back to the client in OpenAI format.
-//
-// Parameters:
-//   - c: The Gin context containing the HTTP request and response
-//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleQwenNonStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.GetRequestMutex().Unlock()
-		}
-	}()
-
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
-			cliCancel()
-			return
-		}
-
-		log.Debugf("Request qwen use account: %s", cliClient.(*client.QwenClient).GetEmail())
-
-		resp, err := cliClient.SendRawMessage(cliCtx, rawJSON, modelName)
+		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
 		if err != nil {
 			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
 				continue
@@ -820,14 +211,14 @@ func (h *OpenAIAPIHandlers) handleQwenNonStreamingResponse(c *gin.Context, rawJS
 	}
 }

-// handleQwenStreamingResponse handles streaming responses for Qwen models.
+// handleStreamingResponse handles streaming responses for Gemini models.
 // It establishes a streaming connection with the backend service and forwards
 // the response chunks to the client in real-time using Server-Sent Events.
 //
 // Parameters:
 //   - c: The Gin context containing the HTTP request and response
 //   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
-func (h *OpenAIAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON []byte) {
+func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
 	c.Header("Content-Type", "text/event-stream")
 	c.Header("Cache-Control", "no-cache")
 	c.Header("Connection", "keep-alive")
@@ -845,13 +236,10 @@ func (h *OpenAIAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON
 		return
 	}

-	// Prepare the request for the backend client.
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())

-	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
-
-	var cliClient client.Client
+	var cliClient interfaces.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
 		if cliClient != nil {
@@ -861,7 +249,7 @@ func (h *OpenAIAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON

 outLoop:
 	for {
-		var errorResponse *client.ErrorMessage
+		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
@@ -871,35 +259,29 @@ outLoop:
 			return
 		}

-		log.Debugf("Request qwen use account: %s", cliClient.(*client.QwenClient).GetEmail())
-
 		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, modelName)
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, "")

 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					// Stream is closed, send the final [DONE] message.
+					_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
 					flusher.Flush()
 					cliCancel()
 					return
 				}

-				h.AddAPIResponseData(c, chunk)
-				h.AddAPIResponseData(c, []byte("\n"))
-
-				// Convert the chunk to OpenAI format and send it to the client.
-				_, _ = c.Writer.Write(chunk)
-				_, _ = c.Writer.Write([]byte("\n"))
-
+				_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
 				flusher.Flush()
 			// Handle errors from the backend.
 			case err, okError := <-errChan: