Add openai codex support

2026-02-03 04:50:52 +08:00 · 2025-08-08 15:44:00 +08:00
parent d2394b0be9
commit de7b8501cc
57 changed files with 7488 additions and 2478 deletions
--- a/internal/api/handlers/claude/code-handlers.go
+++ b/internal/api/handlers/claude/code-handlers.go
@@ -1,208 +0,0 @@
-// Package claude provides HTTP handlers for Claude API code-related functionality.
-// This package implements Claude-compatible streaming chat completions with sophisticated
-// client rotation and quota management systems to ensure high availability and optimal
-// resource utilization across multiple backend clients. It handles request translation
-// between Claude API format and the underlying Gemini backend, providing seamless
-// API compatibility while maintaining robust error handling and connection management.
-package claude
-
-import (
-	"context"
-	"fmt"
-	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/internal/api/translator/claude/code"
-	"github.com/luispater/CLIProxyAPI/internal/client"
-	log "github.com/sirupsen/logrus"
-	"net/http"
-	"strings"
-	"time"
-)
-
-// ClaudeCodeAPIHandlers contains the handlers for Claude API endpoints.
-// It holds a pool of clients to interact with the backend service.
-type ClaudeCodeAPIHandlers struct {
-	*handlers.APIHandlers
-}
-
-// NewClaudeCodeAPIHandlers creates a new Claude API handlers instance.
-// It takes an APIHandlers instance as input and returns a ClaudeCodeAPIHandlers.
-func NewClaudeCodeAPIHandlers(apiHandlers *handlers.APIHandlers) *ClaudeCodeAPIHandlers {
-	return &ClaudeCodeAPIHandlers{
-		APIHandlers: apiHandlers,
-	}
-}
-
-// ClaudeMessages handles Claude-compatible streaming chat completions.
-// This function implements a sophisticated client rotation and quota management system
-// to ensure high availability and optimal resource utilization across multiple backend clients.
-func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
-	// Extract raw JSON data from the incoming request
-	rawJSON, err := c.GetRawData()
-	// If data retrieval fails, return a 400 Bad Request error.
-	if err != nil {
-		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: fmt.Sprintf("Invalid request: %v", err),
-				Type:    "invalid_request_error",
-			},
-		})
-		return
-	}
-
-	// Set up Server-Sent Events (SSE) headers for streaming response
-	// These headers are essential for maintaining a persistent connection
-	// and enabling real-time streaming of chat completions
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	// This is crucial for streaming as it allows immediate sending of data chunks
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	// Parse and prepare the Claude request, extracting model name, system instructions,
-	// conversation contents, and available tools from the raw JSON
-	modelName, systemInstruction, contents, tools := code.PrepareClaudeRequest(rawJSON)
-
-	// Map Claude model names to corresponding Gemini models
-	// This allows the proxy to handle Claude API calls using Gemini backend
-	if modelName == "claude-sonnet-4-20250514" {
-		modelName = "gemini-2.5-pro"
-	} else if modelName == "claude-3-5-haiku-20241022" {
-		modelName = "gemini-2.5-flash"
-	}
-
-	// Create a cancellable context for the backend client request
-	// This allows proper cleanup and cancellation of ongoing requests
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		// This prevents deadlocks and ensures proper resource cleanup
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-	// Main client rotation loop with quota management
-	// This loop implements a sophisticated load balancing and failover mechanism
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		// Determine the authentication method being used by the selected client
-		// This affects how responses are formatted and logged
-		isGlAPIKey := false
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-			isGlAPIKey = true
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-		}
-		// Initiate streaming communication with the backend client
-		// This returns two channels: one for response chunks and one for errors
-
-		includeThoughts := false
-		if userAgent, hasKey := c.Request.Header["User-Agent"]; hasKey {
-			includeThoughts = !strings.Contains(userAgent[0], "claude-cli")
-		}
-
-		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, includeThoughts)
-
-		// Track response state for proper Claude format conversion
-		hasFirstResponse := false
-		responseType := 0
-		responseIndex := 0
-
-		// Main streaming loop - handles multiple concurrent events using Go channels
-		// This select statement manages four different types of events simultaneously
-		for {
-			select {
-			// Case 1: Handle client disconnection
-			// Detects when the HTTP client has disconnected and cleans up resources
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request to prevent resource leaks
-					return
-				}
-
-			// Case 2: Process incoming response chunks from the backend
-			// This handles the actual streaming data from the AI model
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					// Stream has ended - send the final message_stop event
-					// This follows the Claude API specification for stream termination
-					_, _ = c.Writer.Write([]byte(`event: message_stop`))
-					_, _ = c.Writer.Write([]byte("\n"))
-					_, _ = c.Writer.Write([]byte(`data: {"type":"message_stop"}`))
-					_, _ = c.Writer.Write([]byte("\n\n\n"))
-
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-				// Convert the backend response to Claude-compatible format
-				// This translation layer ensures API compatibility
-				claudeFormat := code.ConvertCliToClaude(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
-				if claudeFormat != "" {
-					_, _ = c.Writer.Write([]byte(claudeFormat))
-					flusher.Flush() // Immediately send the chunk to the client
-				}
-				hasFirstResponse = true
-
-			// Case 3: Handle errors from the backend
-			// This manages various error conditions and implements retry logic
-			case errInfo, okError := <-errChan:
-				if okError {
-					// Special handling for quota exceeded errors
-					// If configured, attempt to switch to a different project/client
-					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop // Restart the client selection process
-					} else {
-						// Forward other errors directly to the client
-						c.Status(errInfo.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
-						flusher.Flush()
-						cliCancel()
-					}
-					return
-				}
-
-			// Case 4: Send periodic keep-alive signals
-			// Prevents connection timeouts during long-running requests
-			case <-time.After(500 * time.Millisecond):
-				if hasFirstResponse {
-					// Send a ping event to maintain the connection
-					// This is especially important for slow AI model responses
-					output := "event: ping\n"
-					output = output + `data: {"type": "ping"}`
-					output = output + "\n\n\n"
-					_, _ = c.Writer.Write([]byte(output))
-
-					flusher.Flush()
-				}
-			}
-		}
-	}
-
-}
--- a/internal/api/handlers/claude/code_handlers.go
+++ b/internal/api/handlers/claude/code_handlers.go
@@ -0,0 +1,382 @@
+// Package claude provides HTTP handlers for Claude API code-related functionality.
+// This package implements Claude-compatible streaming chat completions with sophisticated
+// client rotation and quota management systems to ensure high availability and optimal
+// resource utilization across multiple backend clients. It handles request translation
+// between Claude API format and the underlying Gemini backend, providing seamless
+// API compatibility while maintaining robust error handling and connection management.
+package claude
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
+	"github.com/luispater/CLIProxyAPI/internal/client"
+	translatorClaudeCodeToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/claude/code"
+	translatorClaudeCodeToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/claude/code"
+	"github.com/luispater/CLIProxyAPI/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// ClaudeCodeAPIHandlers contains the handlers for Claude API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type ClaudeCodeAPIHandlers struct {
+	*handlers.APIHandlers
+}
+
+// NewClaudeCodeAPIHandlers creates a new Claude API handlers instance.
+// It takes an APIHandlers instance as input and returns a ClaudeCodeAPIHandlers.
+func NewClaudeCodeAPIHandlers(apiHandlers *handlers.APIHandlers) *ClaudeCodeAPIHandlers {
+	return &ClaudeCodeAPIHandlers{
+		APIHandlers: apiHandlers,
+	}
+}
+
+// ClaudeMessages handles Claude-compatible streaming chat completions.
+// This function implements a sophisticated client rotation and quota management system
+// to ensure high availability and optimal resource utilization across multiple backend clients.
+func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
+	// Extract raw JSON data from the incoming request
+	rawJSON, err := c.GetRawData()
+	// If data retrieval fails, return a 400 Bad Request error.
+	if err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	// h.handleGeminiStreamingResponse(c, rawJSON)
+	// h.handleCodexStreamingResponse(c, rawJSON)
+	modelName := gjson.GetBytes(rawJSON, "model")
+	provider := util.GetProviderName(modelName.String())
+	if provider == "gemini" {
+		h.handleGeminiStreamingResponse(c, rawJSON)
+	} else if provider == "gpt" {
+		h.handleCodexStreamingResponse(c, rawJSON)
+	} else {
+		h.handleGeminiStreamingResponse(c, rawJSON)
+	}
+}
+
+// handleGeminiStreamingResponse streams Claude-compatible responses backed by Gemini.
+// It sets up SSE, selects a backend client with rotation/quota logic,
+// forwards chunks, and translates them to Claude CLI format.
+func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSON []byte) {
+	// Set up Server-Sent Events (SSE) headers for streaming response
+	// These headers are essential for maintaining a persistent connection
+	// and enabling real-time streaming of chat completions
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	// This is crucial for streaming as it allows immediate sending of data chunks
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Parse and prepare the Claude request, extracting model name, system instructions,
+	// conversation contents, and available tools from the raw JSON
+	modelName, systemInstruction, contents, tools := translatorClaudeCodeToGeminiCli.ConvertClaudeCodeRequestToCli(rawJSON)
+
+	// Map Claude model names to corresponding Gemini models
+	// This allows the proxy to handle Claude API calls using Gemini backend
+	if modelName == "claude-sonnet-4-20250514" {
+		modelName = "gemini-2.5-pro"
+	} else if modelName == "claude-3-5-haiku-20241022" {
+		modelName = "gemini-2.5-flash"
+	}
+
+	// Create a cancellable context for the backend client request
+	// This allows proper cleanup and cancellation of ongoing requests
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	cliClient = client.NewGeminiClient(nil, nil, nil)
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		// This prevents deadlocks and ensures proper resource cleanup
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	// Main client rotation loop with quota management
+	// This loop implements a sophisticated load balancing and failover mechanism
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		// Determine the authentication method being used by the selected client
+		// This affects how responses are formatted and logged
+		isGlAPIKey := false
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use gemini generative language API Key: %s", glAPIKey)
+			isGlAPIKey = true
+		} else {
+			log.Debugf("Request use gemini account: %s, project id: %s", cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+		}
+		// Initiate streaming communication with the backend client
+		// This returns two channels: one for response chunks and one for errors
+
+		includeThoughts := false
+		if userAgent, hasKey := c.Request.Header["User-Agent"]; hasKey {
+			includeThoughts = !strings.Contains(userAgent[0], "claude-cli")
+		}
+
+		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, includeThoughts)
+
+		// Track response state for proper Claude format conversion
+		hasFirstResponse := false
+		responseType := 0
+		responseIndex := 0
+
+		// Main streaming loop - handles multiple concurrent events using Go channels
+		// This select statement manages four different types of events simultaneously
+		for {
+			select {
+			// Case 1: Handle client disconnection
+			// Detects when the HTTP client has disconnected and cleans up resources
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request to prevent resource leaks
+					return
+				}
+
+			// Case 2: Process incoming response chunks from the backend
+			// This handles the actual streaming data from the AI model
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					// Stream has ended - send the final message_stop event
+					// This follows the Claude API specification for stream termination
+					_, _ = c.Writer.Write([]byte(`event: message_stop`))
+					_, _ = c.Writer.Write([]byte("\n"))
+					_, _ = c.Writer.Write([]byte(`data: {"type":"message_stop"}`))
+					_, _ = c.Writer.Write([]byte("\n\n\n"))
+
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+				// Convert the backend response to Claude-compatible format
+				// This translation layer ensures API compatibility
+				claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
+				if claudeFormat != "" {
+					_, _ = c.Writer.Write([]byte(claudeFormat))
+					flusher.Flush() // Immediately send the chunk to the client
+				}
+				hasFirstResponse = true
+
+			// Case 3: Handle errors from the backend
+			// This manages various error conditions and implements retry logic
+			case errInfo, okError := <-errChan:
+				if okError {
+					// Special handling for quota exceeded errors
+					// If configured, attempt to switch to a different project/client
+					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop // Restart the client selection process
+					} else {
+						// Forward other errors directly to the client
+						c.Status(errInfo.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+
+			// Case 4: Send periodic keep-alive signals
+			// Prevents connection timeouts during long-running requests
+			case <-time.After(500 * time.Millisecond):
+				if hasFirstResponse {
+					// Send a ping event to maintain the connection
+					// This is especially important for slow AI model responses
+					output := "event: ping\n"
+					output = output + `data: {"type": "ping"}`
+					output = output + "\n\n\n"
+					_, _ = c.Writer.Write([]byte(output))
+
+					flusher.Flush()
+				}
+			}
+		}
+	}
+}
+
+// handleCodexStreamingResponse streams Claude-compatible responses backed by OpenAI.
+// It converts the Claude request into Codex/OpenAI responses format, establishes SSE,
+// and translates streaming chunks back into Claude CLI events.
+func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON []byte) {
+	// Set up Server-Sent Events (SSE) headers for streaming response
+	// These headers are essential for maintaining a persistent connection
+	// and enabling real-time streaming of chat completions
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	// This is crucial for streaming as it allows immediate sending of data chunks
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Parse and prepare the Claude request, extracting model name, system instructions,
+	// conversation contents, and available tools from the raw JSON
+	newRequestJSON := translatorClaudeCodeToCodex.ConvertClaudeCodeRequestToCodex(rawJSON)
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	// Map Claude model names to corresponding Gemini models
+	// This allows the proxy to handle Claude API calls using Gemini backend
+	if modelName == "claude-sonnet-4-20250514" {
+		modelName = "gpt-5"
+	} else if modelName == "claude-3-5-haiku-20241022" {
+		modelName = "gpt-5"
+	}
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
+	// log.Debugf(string(rawJSON))
+	// log.Debugf(newRequestJSON)
+	// return
+	// Create a cancellable context for the backend client request
+	// This allows proper cleanup and cancellation of ongoing requests
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		// This prevents deadlocks and ensures proper resource cleanup
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	// Main client rotation loop with quota management
+	// This loop implements a sophisticated load balancing and failover mechanism
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request use codex account: %s", cliClient.GetEmail())
+
+		// Initiate streaming communication with the backend client
+		// This returns two channels: one for response chunks and one for errors
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		// Track response state for proper Claude format conversion
+		hasFirstResponse := false
+		hasToolCall := false
+
+		// Main streaming loop - handles multiple concurrent events using Go channels
+		// This select statement manages four different types of events simultaneously
+		for {
+			select {
+			// Case 1: Handle client disconnection
+			// Detects when the HTTP client has disconnected and cleans up resources
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request to prevent resource leaks
+					return
+				}
+
+			// Case 2: Process incoming response chunks from the backend
+			// This handles the actual streaming data from the AI model
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+				// Convert the backend response to Claude-compatible format
+				// This translation layer ensures API compatibility
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					var claudeFormat string
+					claudeFormat, hasToolCall = translatorClaudeCodeToCodex.ConvertCodexResponseToClaude(jsonData, hasToolCall)
+					// log.Debugf("claudeFormat: %s", claudeFormat)
+					if claudeFormat != "" {
+						_, _ = c.Writer.Write([]byte(claudeFormat))
+						_, _ = c.Writer.Write([]byte("\n"))
+					}
+					flusher.Flush() // Immediately send the chunk to the client
+					hasFirstResponse = true
+				} else {
+					// log.Debugf("chunk: %s", string(chunk))
+				}
+			// Case 3: Handle errors from the backend
+			// This manages various error conditions and implements retry logic
+			case errInfo, okError := <-errChan:
+				if okError {
+					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
+					// Special handling for quota exceeded errors
+					// If configured, attempt to switch to a different project/client
+					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						log.Debugf("quota exceeded, switch client")
+						continue outLoop // Restart the client selection process
+					} else {
+						// Forward other errors directly to the client
+						c.Status(errInfo.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+
+			// Case 4: Send periodic keep-alive signals
+			// Prevents connection timeouts during long-running requests
+			case <-time.After(3000 * time.Millisecond):
+				if hasFirstResponse {
+					// Send a ping event to maintain the connection
+					// This is especially important for slow AI model responses
+					output := "event: ping\n"
+					output = output + `data: {"type": "ping"}`
+					output = output + "\n\n"
+					_, _ = c.Writer.Write([]byte(output))
+
+					flusher.Flush()
+				}
+			}
+		}
+	}
+}
--- a/internal/api/handlers/gemini/cli/cli-handlers.go
+++ b/internal/api/handlers/gemini/cli/cli-handlers.go
@@ -1,268 +0,0 @@
-// Package cli provides HTTP handlers for Gemini CLI API functionality.
-// This package implements handlers that process CLI-specific requests for Gemini API operations,
-// including content generation and streaming content generation endpoints.
-// The handlers restrict access to localhost only and manage communication with the backend service.
-package cli
-
-import (
-	"bytes"
-	"context"
-	"fmt"
-	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/internal/client"
-	"github.com/luispater/CLIProxyAPI/internal/util"
-	log "github.com/sirupsen/logrus"
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-	"io"
-	"net/http"
-	"strings"
-	"time"
-)
-
-// GeminiCLIAPIHandlers contains the handlers for Gemini CLI API endpoints.
-// It holds a pool of clients to interact with the backend service.
-type GeminiCLIAPIHandlers struct {
-	*handlers.APIHandlers
-}
-
-// NewGeminiCLIAPIHandlers creates a new Gemini CLI API handlers instance.
-// It takes an APIHandlers instance as input and returns a GeminiCLIAPIHandlers.
-func NewGeminiCLIAPIHandlers(apiHandlers *handlers.APIHandlers) *GeminiCLIAPIHandlers {
-	return &GeminiCLIAPIHandlers{
-		APIHandlers: apiHandlers,
-	}
-}
-
-// CLIHandler handles CLI-specific requests for Gemini API operations.
-// It restricts access to localhost only and routes requests to appropriate internal handlers.
-func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
-	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
-		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "CLI reply only allow local access",
-				Type:    "forbidden",
-			},
-		})
-		return
-	}
-
-	rawJSON, _ := c.GetRawData()
-	requestRawURI := c.Request.URL.Path
-	if requestRawURI == "/v1internal:generateContent" {
-		h.internalGenerateContent(c, rawJSON)
-	} else if requestRawURI == "/v1internal:streamGenerateContent" {
-		h.internalStreamGenerateContent(c, rawJSON)
-	} else {
-		reqBody := bytes.NewBuffer(rawJSON)
-		req, err := http.NewRequest("POST", fmt.Sprintf("https://cloudcode-pa.googleapis.com%s", c.Request.URL.RequestURI()), reqBody)
-		if err != nil {
-			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: fmt.Sprintf("Invalid request: %v", err),
-					Type:    "invalid_request_error",
-				},
-			})
-			return
-		}
-		for key, value := range c.Request.Header {
-			req.Header[key] = value
-		}
-
-		httpClient, err := util.SetProxy(h.Cfg, &http.Client{})
-		if err != nil {
-			log.Fatalf("set proxy failed: %v", err)
-		}
-
-		resp, err := httpClient.Do(req)
-		if err != nil {
-			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: fmt.Sprintf("Invalid request: %v", err),
-					Type:    "invalid_request_error",
-				},
-			})
-			return
-		}
-
-		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-			defer func() {
-				if err = resp.Body.Close(); err != nil {
-					log.Printf("warn: failed to close response body: %v", err)
-				}
-			}()
-			bodyBytes, _ := io.ReadAll(resp.Body)
-
-			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: string(bodyBytes),
-					Type:    "invalid_request_error",
-				},
-			})
-			return
-		}
-
-		defer func() {
-			_ = resp.Body.Close()
-		}()
-
-		for key, value := range resp.Header {
-			c.Header(key, value[0])
-		}
-		output, err := io.ReadAll(resp.Body)
-		if err != nil {
-			log.Errorf("Failed to read response body: %v", err)
-			return
-		}
-		_, _ = c.Writer.Write(output)
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) internalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
-	alt := h.GetAlt(c)
-
-	if alt == "" {
-		c.Header("Content-Type", "text/event-stream")
-		c.Header("Cache-Control", "no-cache")
-		c.Header("Connection", "keep-alive")
-		c.Header("Access-Control-Allow-Origin", "*")
-	}
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-		}
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
-		hasFirstResponse := false
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-				hasFirstResponse = true
-				if cliClient.GetGenerativeLanguageAPIKey() != "" {
-					chunk, _ = sjson.SetRawBytes(chunk, "response", chunk)
-				}
-				_, _ = c.Writer.Write([]byte("data: "))
-				_, _ = c.Writer.Write(chunk)
-				_, _ = c.Writer.Write([]byte("\n\n"))
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel()
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-				if hasFirstResponse {
-					_, _ = c.Writer.Write([]byte("\n"))
-					flusher.Flush()
-				}
-			}
-		}
-	}
-}
-
-func (h *GeminiCLIAPIHandlers) internalGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			cliCancel()
-			return
-		}
-
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-		}
-
-		resp, err := cliClient.SendRawMessage(cliCtx, rawJSON, "")
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel()
-			}
-			break
-		} else {
-			_, _ = c.Writer.Write(resp)
-			cliCancel()
-			break
-		}
-	}
-}
--- a/internal/api/handlers/gemini/cli/cli_handlers.go
+++ b/internal/api/handlers/gemini/cli/cli_handlers.go
@@ -0,0 +1,491 @@
+// Package cli provides HTTP handlers for Gemini CLI API functionality.
+// This package implements handlers that process CLI-specific requests for Gemini API operations,
+// including content generation and streaming content generation endpoints.
+// The handlers restrict access to localhost only and manage communication with the backend service.
+package cli
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
+	"github.com/luispater/CLIProxyAPI/internal/client"
+	translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
+	"github.com/luispater/CLIProxyAPI/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// GeminiCLIAPIHandlers contains the handlers for Gemini CLI API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type GeminiCLIAPIHandlers struct {
+	*handlers.APIHandlers
+}
+
+// NewGeminiCLIAPIHandlers creates a new Gemini CLI API handlers instance.
+// It takes an APIHandlers instance as input and returns a GeminiCLIAPIHandlers.
+func NewGeminiCLIAPIHandlers(apiHandlers *handlers.APIHandlers) *GeminiCLIAPIHandlers {
+	return &GeminiCLIAPIHandlers{
+		APIHandlers: apiHandlers,
+	}
+}
+
+// CLIHandler handles CLI-specific requests for Gemini API operations.
+// It restricts access to localhost only and routes requests to appropriate internal handlers.
+func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
+	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
+		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "CLI reply only allow local access",
+				Type:    "forbidden",
+			},
+		})
+		return
+	}
+
+	rawJSON, _ := c.GetRawData()
+	requestRawURI := c.Request.URL.Path
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+	provider := util.GetProviderName(modelName.String())
+
+	if requestRawURI == "/v1internal:generateContent" {
+		if provider == "gemini" || provider == "unknow" {
+			h.handleInternalGenerateContent(c, rawJSON)
+		} else if provider == "gpt" {
+			h.handleCodexInternalGenerateContent(c, rawJSON)
+		}
+	} else if requestRawURI == "/v1internal:streamGenerateContent" {
+		if provider == "gemini" || provider == "unknow" {
+			h.handleInternalStreamGenerateContent(c, rawJSON)
+		} else if provider == "gpt" {
+			h.handleCodexInternalStreamGenerateContent(c, rawJSON)
+		}
+	} else {
+		reqBody := bytes.NewBuffer(rawJSON)
+		req, err := http.NewRequest("POST", fmt.Sprintf("https://cloudcode-pa.googleapis.com%s", c.Request.URL.RequestURI()), reqBody)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: fmt.Sprintf("Invalid request: %v", err),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+		for key, value := range c.Request.Header {
+			req.Header[key] = value
+		}
+
+		httpClient := util.SetProxy(h.Cfg, &http.Client{})
+
+		resp, err := httpClient.Do(req)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: fmt.Sprintf("Invalid request: %v", err),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+
+		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+			defer func() {
+				if err = resp.Body.Close(); err != nil {
+					log.Printf("warn: failed to close response body: %v", err)
+				}
+			}()
+			bodyBytes, _ := io.ReadAll(resp.Body)
+
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: string(bodyBytes),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+
+		defer func() {
+			_ = resp.Body.Close()
+		}()
+
+		for key, value := range resp.Header {
+			c.Header(key, value[0])
+		}
+		output, err := io.ReadAll(resp.Body)
+		if err != nil {
+			log.Errorf("Failed to read response body: %v", err)
+			return
+		}
+		_, _ = c.Writer.Write(output)
+	}
+}
+
+func (h *GeminiCLIAPIHandlers) handleInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	alt := h.GetAlt(c)
+
+	if alt == "" {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use generative language API Key: %s", glAPIKey)
+		} else {
+			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+		}
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
+		hasFirstResponse := false
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+				hasFirstResponse = true
+				if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() != "" {
+					chunk, _ = sjson.SetRawBytes(chunk, "response", chunk)
+				}
+				_, _ = c.Writer.Write([]byte("data: "))
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n\n"))
+
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+				if hasFirstResponse {
+					_, _ = c.Writer.Write([]byte("\n"))
+					flusher.Flush()
+				}
+			}
+		}
+	}
+}
+
+func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+	// log.Debugf("GenerateContent: %s", string(rawJSON))
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			cliCancel()
+			return
+		}
+
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use generative language API Key: %s", glAPIKey)
+		} else {
+			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+		}
+
+		resp, err := cliClient.SendRawMessage(cliCtx, rawJSON, "")
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
+				cliCancel()
+			}
+			break
+		} else {
+			_, _ = c.Writer.Write(resp)
+			cliCancel()
+			break
+		}
+	}
+}
+
+func (h *GeminiCLIAPIHandlers) handleCodexInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
+	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
+	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
+
+	// log.Debugf("Request: %s", string(rawJSON))
+	// return
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToCodex.ConvertGeminiRequestToCodex(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		params := &translatorGeminiToCodex.ConvertCodexResponseToGeminiParams{
+			Model:             modelName.String(),
+			CreatedAt:         0,
+			ResponseID:        "",
+			LastStorageOutput: "",
+		}
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+				// _, _ = logFile.Write(chunk)
+				// _, _ = logFile.Write([]byte("\n"))
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() != "" {
+						outputs := translatorGeminiToCodex.ConvertCodexResponseToGemini(jsonData, params)
+						if len(outputs) > 0 {
+							for i := 0; i < len(outputs); i++ {
+								outputs[i], _ = sjson.SetRaw("{}", "response", outputs[i])
+								_, _ = c.Writer.Write([]byte("data: "))
+								_, _ = c.Writer.Write([]byte(outputs[i]))
+								_, _ = c.Writer.Write([]byte("\n\n"))
+							}
+						}
+					}
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case errMessage, okError := <-errChan:
+				if okError {
+					if errMessage.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						log.Debugf("code: %d, error: %s", errMessage.StatusCode, errMessage.Error.Error())
+						c.Status(errMessage.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, errMessage.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiCLIAPIHandlers) handleCodexInternalGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+	orgRawJSON := rawJSON
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
+	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
+	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToCodex.ConvertGeminiRequestToCodex(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() != "" {
+						var geminiStr string
+						geminiStr = translatorGeminiToCodex.ConvertCodexResponseToGeminiNonStream(jsonData, modelName.String())
+						if geminiStr != "" {
+							_, _ = c.Writer.Write([]byte(geminiStr))
+						}
+					}
+				}
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						log.Debugf("org: %s", string(orgRawJSON))
+						log.Debugf("raw: %s", string(rawJSON))
+						log.Debugf("newRequestJSON: %s", newRequestJSON)
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
--- a/internal/api/handlers/gemini/gemini-handlers.go
+++ b/internal/api/handlers/gemini/gemini-handlers.go
@@ -1,437 +0,0 @@
-// Package gemini provides HTTP handlers for Gemini API endpoints.
-// This package implements handlers for managing Gemini model operations including
-// model listing, content generation, streaming content generation, and token counting.
-// It serves as a proxy layer between clients and the Gemini backend service,
-// handling request translation, client management, and response processing.
-package gemini
-
-import (
-	"context"
-	"fmt"
-	"github.com/gin-gonic/gin"
-	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/internal/api/translator/gemini/cli"
-	"github.com/luispater/CLIProxyAPI/internal/client"
-	log "github.com/sirupsen/logrus"
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-	"net/http"
-	"strings"
-	"time"
-)
-
-// GeminiAPIHandlers contains the handlers for Gemini API endpoints.
-// It holds a pool of clients to interact with the backend service.
-type GeminiAPIHandlers struct {
-	*handlers.APIHandlers
-}
-
-// NewGeminiAPIHandlers creates a new Gemini API handlers instance.
-// It takes an APIHandlers instance as input and returns a GeminiAPIHandlers.
-func NewGeminiAPIHandlers(apiHandlers *handlers.APIHandlers) *GeminiAPIHandlers {
-	return &GeminiAPIHandlers{
-		APIHandlers: apiHandlers,
-	}
-}
-
-// GeminiModels handles the Gemini models listing endpoint.
-// It returns a JSON response containing available Gemini models and their specifications.
-func (h *GeminiAPIHandlers) GeminiModels(c *gin.Context) {
-	c.Status(http.StatusOK)
-	c.Header("Content-Type", "application/json; charset=UTF-8")
-	_, _ = c.Writer.Write([]byte(`{"models":[{"name":"models/gemini-2.5-flash","version":"001","displayName":"Gemini `))
-	_, _ = c.Writer.Write([]byte(`2.5 Flash","description":"Stable version of Gemini 2.5 Flash, our mid-size multimod`))
-	_, _ = c.Writer.Write([]byte(`al model that supports up to 1 million tokens, released in June of 2025.","inputTok`))
-	_, _ = c.Writer.Write([]byte(`enLimit":1048576,"outputTokenLimit":65536,"supportedGenerationMethods":["generateCo`))
-	_, _ = c.Writer.Write([]byte(`ntent","countTokens","createCachedContent","batchGenerateContent"],"temperature":1,`))
-	_, _ = c.Writer.Write([]byte(`"topP":0.95,"topK":64,"maxTemperature":2,"thinking":true},{"name":"models/gemini-2.`))
-	_, _ = c.Writer.Write([]byte(`5-pro","version":"2.5","displayName":"Gemini 2.5 Pro","description":"Stable release`))
-	_, _ = c.Writer.Write([]byte(` (June 17th, 2025) of Gemini 2.5 Pro","inputTokenLimit":1048576,"outputTokenLimit":`))
-	_, _ = c.Writer.Write([]byte(`65536,"supportedGenerationMethods":["generateContent","countTokens","createCachedCo`))
-	_, _ = c.Writer.Write([]byte(`ntent","batchGenerateContent"],"temperature":1,"topP":0.95,"topK":64,"maxTemperatur`))
-	_, _ = c.Writer.Write([]byte(`e":2,"thinking":true}],"nextPageToken":""}`))
-}
-
-// GeminiGetHandler handles GET requests for specific Gemini model information.
-// It returns detailed information about a specific Gemini model based on the action parameter.
-func (h *GeminiAPIHandlers) GeminiGetHandler(c *gin.Context) {
-	var request struct {
-		Action string `uri:"action" binding:"required"`
-	}
-	if err := c.ShouldBindUri(&request); err != nil {
-		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: fmt.Sprintf("Invalid request: %v", err),
-				Type:    "invalid_request_error",
-			},
-		})
-		return
-	}
-	if request.Action == "gemini-2.5-pro" {
-		c.Status(http.StatusOK)
-		c.Header("Content-Type", "application/json; charset=UTF-8")
-		_, _ = c.Writer.Write([]byte(`{"name":"models/gemini-2.5-pro","version":"2.5","displayName":"Gemini 2.5 Pro",`))
-		_, _ = c.Writer.Write([]byte(`"description":"Stable release (June 17th, 2025) of Gemini 2.5 Pro","inputTokenL`))
-		_, _ = c.Writer.Write([]byte(`imit":1048576,"outputTokenLimit":65536,"supportedGenerationMethods":["generateC`))
-		_, _ = c.Writer.Write([]byte(`ontent","countTokens","createCachedContent","batchGenerateContent"],"temperatur`))
-		_, _ = c.Writer.Write([]byte(`e":1,"topP":0.95,"topK":64,"maxTemperature":2,"thinking":true}`))
-	} else if request.Action == "gemini-2.5-flash" {
-		c.Status(http.StatusOK)
-		c.Header("Content-Type", "application/json; charset=UTF-8")
-		_, _ = c.Writer.Write([]byte(`{"name":"models/gemini-2.5-flash","version":"001","displayName":"Gemini 2.5 Fla`))
-		_, _ = c.Writer.Write([]byte(`sh","description":"Stable version of Gemini 2.5 Flash, our mid-size multimodal `))
-		_, _ = c.Writer.Write([]byte(`model that supports up to 1 million tokens, released in June of 2025.","inputTo`))
-		_, _ = c.Writer.Write([]byte(`kenLimit":1048576,"outputTokenLimit":65536,"supportedGenerationMethods":["gener`))
-		_, _ = c.Writer.Write([]byte(`ateContent","countTokens","createCachedContent","batchGenerateContent"],"temper`))
-		_, _ = c.Writer.Write([]byte(`ature":1,"topP":0.95,"topK":64,"maxTemperature":2,"thinking":true}`))
-	} else {
-		c.Status(http.StatusNotFound)
-		_, _ = c.Writer.Write([]byte(
-			`{"error":{"message":"Not Found","code":404,"status":"NOT_FOUND"}}`,
-		))
-	}
-}
-
-// GeminiHandler handles POST requests for Gemini API operations.
-// It routes requests to appropriate handlers based on the action parameter (model:method format).
-func (h *GeminiAPIHandlers) GeminiHandler(c *gin.Context) {
-	var request struct {
-		Action string `uri:"action" binding:"required"`
-	}
-	if err := c.ShouldBindUri(&request); err != nil {
-		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: fmt.Sprintf("Invalid request: %v", err),
-				Type:    "invalid_request_error",
-			},
-		})
-		return
-	}
-	action := strings.Split(request.Action, ":")
-	if len(action) != 2 {
-		c.JSON(http.StatusNotFound, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: fmt.Sprintf("%s not found.", c.Request.URL.Path),
-				Type:    "invalid_request_error",
-			},
-		})
-		return
-	}
-
-	modelName := action[0]
-	method := action[1]
-	rawJSON, _ := c.GetRawData()
-	rawJSON, _ = sjson.SetBytes(rawJSON, "model", []byte(modelName))
-
-	if method == "generateContent" {
-		h.geminiGenerateContent(c, rawJSON)
-	} else if method == "streamGenerateContent" {
-		h.geminiStreamGenerateContent(c, rawJSON)
-	} else if method == "countTokens" {
-		h.geminiCountTokens(c, rawJSON)
-	}
-}
-
-func (h *GeminiAPIHandlers) geminiStreamGenerateContent(c *gin.Context, rawJSON []byte) {
-	alt := h.GetAlt(c)
-
-	if alt == "" {
-		c.Header("Content-Type", "text/event-stream")
-		c.Header("Cache-Control", "no-cache")
-		c.Header("Connection", "keep-alive")
-		c.Header("Access-Control-Allow-Origin", "*")
-	}
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		template := ""
-		parsed := gjson.Parse(string(rawJSON))
-		contents := parsed.Get("request.contents")
-		if contents.Exists() {
-			template = string(rawJSON)
-		} else {
-			template = `{"project":"","request":{},"model":""}`
-			template, _ = sjson.SetRaw(template, "request", string(rawJSON))
-			template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
-			template, _ = sjson.Delete(template, "request.model")
-		}
-
-		template, errFixCLIToolResponse := cli.FixCLIToolResponse(template)
-		if errFixCLIToolResponse != nil {
-			c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: errFixCLIToolResponse.Error(),
-					Type:    "server_error",
-				},
-			})
-			cliCancel()
-			return
-		}
-
-		systemInstructionResult := gjson.Get(template, "request.system_instruction")
-		if systemInstructionResult.Exists() {
-			template, _ = sjson.SetRaw(template, "request.systemInstruction", systemInstructionResult.Raw)
-			template, _ = sjson.Delete(template, "request.system_instruction")
-		}
-		rawJSON = []byte(template)
-
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-		}
-
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, alt)
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					cliCancel()
-					return
-				}
-				if cliClient.GetGenerativeLanguageAPIKey() == "" {
-					if alt == "" {
-						responseResult := gjson.GetBytes(chunk, "response")
-						if responseResult.Exists() {
-							chunk = []byte(responseResult.Raw)
-						}
-					} else {
-						chunkTemplate := "[]"
-						responseResult := gjson.ParseBytes(chunk)
-						if responseResult.IsArray() {
-							responseResultItems := responseResult.Array()
-							for i := 0; i < len(responseResultItems); i++ {
-								responseResultItem := responseResultItems[i]
-								if responseResultItem.Get("response").Exists() {
-									chunkTemplate, _ = sjson.SetRaw(chunkTemplate, "-1", responseResultItem.Get("response").Raw)
-								}
-							}
-						}
-						chunk = []byte(chunkTemplate)
-					}
-				}
-				if alt == "" {
-					_, _ = c.Writer.Write([]byte("data: "))
-					_, _ = c.Writer.Write(chunk)
-					_, _ = c.Writer.Write([]byte("\n\n"))
-				} else {
-					_, _ = c.Writer.Write(chunk)
-				}
-				flusher.Flush()
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						log.Debugf("quota exceeded, switch client")
-						continue outLoop
-					} else {
-						log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel()
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-			}
-		}
-	}
-}
-
-func (h *GeminiAPIHandlers) geminiCountTokens(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	alt := h.GetAlt(c)
-	// orgrawJSON := rawJSON
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName, false)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			cliCancel()
-			return
-		}
-
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-
-			template := `{"request":{}}`
-			if gjson.GetBytes(rawJSON, "generateContentRequest").Exists() {
-				template, _ = sjson.SetRaw(template, "request", gjson.GetBytes(rawJSON, "generateContentRequest").Raw)
-				template, _ = sjson.Delete(template, "generateContentRequest")
-			} else if gjson.GetBytes(rawJSON, "contents").Exists() {
-				template, _ = sjson.SetRaw(template, "request.contents", gjson.GetBytes(rawJSON, "contents").Raw)
-				template, _ = sjson.Delete(template, "contents")
-			}
-			rawJSON = []byte(template)
-		}
-
-		resp, err := cliClient.SendRawTokenCount(cliCtx, rawJSON, alt)
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel()
-				// log.Debugf(err.Error.Error())
-				// log.Debugf(string(rawJSON))
-				// log.Debugf(string(orgrawJSON))
-			}
-			break
-		} else {
-			if cliClient.GetGenerativeLanguageAPIKey() == "" {
-				responseResult := gjson.GetBytes(resp, "response")
-				if responseResult.Exists() {
-					resp = []byte(responseResult.Raw)
-				}
-			}
-			_, _ = c.Writer.Write(resp)
-			cliCancel()
-			break
-		}
-	}
-}
-
-func (h *GeminiAPIHandlers) geminiGenerateContent(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	alt := h.GetAlt(c)
-
-	modelResult := gjson.GetBytes(rawJSON, "model")
-	modelName := modelResult.String()
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			cliCancel()
-			return
-		}
-
-		template := ""
-		parsed := gjson.Parse(string(rawJSON))
-		contents := parsed.Get("request.contents")
-		if contents.Exists() {
-			template = string(rawJSON)
-		} else {
-			template = `{"project":"","request":{},"model":""}`
-			template, _ = sjson.SetRaw(template, "request", string(rawJSON))
-			template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
-			template, _ = sjson.Delete(template, "request.model")
-		}
-
-		template, errFixCLIToolResponse := cli.FixCLIToolResponse(template)
-		if errFixCLIToolResponse != nil {
-			c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-				Error: handlers.ErrorDetail{
-					Message: errFixCLIToolResponse.Error(),
-					Type:    "server_error",
-				},
-			})
-			cliCancel()
-			return
-		}
-
-		systemInstructionResult := gjson.Get(template, "request.system_instruction")
-		if systemInstructionResult.Exists() {
-			template, _ = sjson.SetRaw(template, "request.systemInstruction", systemInstructionResult.Raw)
-			template, _ = sjson.Delete(template, "request.system_instruction")
-		}
-		rawJSON = []byte(template)
-
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-		}
-		resp, err := cliClient.SendRawMessage(cliCtx, rawJSON, alt)
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel()
-			}
-			break
-		} else {
-			if cliClient.GetGenerativeLanguageAPIKey() == "" {
-				responseResult := gjson.GetBytes(resp, "response")
-				if responseResult.Exists() {
-					resp = []byte(responseResult.Raw)
-				}
-			}
-			_, _ = c.Writer.Write(resp)
-			cliCancel()
-			break
-		}
-	}
-}
--- a/internal/api/handlers/gemini/gemini_handlers.go
+++ b/internal/api/handlers/gemini/gemini_handlers.go
@@ -0,0 +1,735 @@
+// Package gemini provides HTTP handlers for Gemini API endpoints.
+// This package implements handlers for managing Gemini model operations including
+// model listing, content generation, streaming content generation, and token counting.
+// It serves as a proxy layer between clients and the Gemini backend service,
+// handling request translation, client management, and response processing.
+package gemini
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
+	"github.com/luispater/CLIProxyAPI/internal/client"
+	translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
+	translatorGeminiToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/gemini/cli"
+	"github.com/luispater/CLIProxyAPI/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// GeminiAPIHandlers contains the handlers for Gemini API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type GeminiAPIHandlers struct {
+	*handlers.APIHandlers
+}
+
+// NewGeminiAPIHandlers creates a new Gemini API handlers instance.
+// It takes an APIHandlers instance as input and returns a GeminiAPIHandlers.
+func NewGeminiAPIHandlers(apiHandlers *handlers.APIHandlers) *GeminiAPIHandlers {
+	return &GeminiAPIHandlers{
+		APIHandlers: apiHandlers,
+	}
+}
+
+// GeminiModels handles the Gemini models listing endpoint.
+// It returns a JSON response containing available Gemini models and their specifications.
+func (h *GeminiAPIHandlers) GeminiModels(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{
+		"data": []map[string]any{
+			{
+				"id":                    "gemini-2.5-flash",
+				"object":                "model",
+				"version":               "001",
+				"name":                  "Gemini 2.5 Flash",
+				"description":           "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+				"context_length":        1_048_576,
+				"max_completion_tokens": 65_536,
+				"supported_parameters": []string{
+					"tools",
+					"temperature",
+					"top_p",
+					"top_k",
+				},
+				"temperature":    1,
+				"topP":           0.95,
+				"topK":           64,
+				"maxTemperature": 2,
+				"thinking":       true,
+			},
+			{
+				"id":                    "gemini-2.5-pro",
+				"object":                "model",
+				"version":               "2.5",
+				"name":                  "Gemini 2.5 Pro",
+				"description":           "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+				"context_length":        1_048_576,
+				"max_completion_tokens": 65_536,
+				"supported_parameters": []string{
+					"tools",
+					"temperature",
+					"top_p",
+					"top_k",
+				},
+				"temperature":    1,
+				"topP":           0.95,
+				"topK":           64,
+				"maxTemperature": 2,
+				"thinking":       true,
+			},
+			{
+				"id":                    "gpt-5",
+				"object":                "model",
+				"version":               "gpt-5-2025-08-07",
+				"name":                  "GPT 5",
+				"description":           "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+				"context_length":        400_000,
+				"max_completion_tokens": 128_000,
+				"supported_parameters": []string{
+					"tools",
+				},
+				"temperature":    1,
+				"topP":           0.95,
+				"topK":           64,
+				"maxTemperature": 2,
+				"thinking":       true,
+			},
+		},
+	})
+}
+
+// GeminiGetHandler handles GET requests for specific Gemini model information.
+// It returns detailed information about a specific Gemini model based on the action parameter.
+func (h *GeminiAPIHandlers) GeminiGetHandler(c *gin.Context) {
+	var request struct {
+		Action string `uri:"action" binding:"required"`
+	}
+	if err := c.ShouldBindUri(&request); err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+	switch request.Action {
+	case "gemini-2.5-pro":
+		c.JSON(http.StatusOK, gin.H{
+			"id":                    "gemini-2.5-pro",
+			"object":                "model",
+			"version":               "2.5",
+			"name":                  "Gemini 2.5 Pro",
+			"description":           "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			"context_length":        1_048_576,
+			"max_completion_tokens": 65_536,
+			"supported_parameters": []string{
+				"tools",
+				"temperature",
+				"top_p",
+				"top_k",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		})
+	case "gemini-2.5-flash":
+		c.JSON(http.StatusOK, gin.H{
+			"id":                    "gemini-2.5-flash",
+			"object":                "model",
+			"version":               "001",
+			"name":                  "Gemini 2.5 Flash",
+			"description":           "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			"context_length":        1_048_576,
+			"max_completion_tokens": 65_536,
+			"supported_parameters": []string{
+				"tools",
+				"temperature",
+				"top_p",
+				"top_k",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		})
+	case "gpt-5":
+		c.JSON(http.StatusOK, gin.H{
+			"id":                    "gpt-5",
+			"object":                "model",
+			"version":               "gpt-5-2025-08-07",
+			"name":                  "GPT 5",
+			"description":           "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			"context_length":        400_000,
+			"max_completion_tokens": 128_000,
+			"supported_parameters": []string{
+				"tools",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		})
+	default:
+		c.JSON(http.StatusNotFound, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Not Found",
+				Type:    "not_found",
+			},
+		})
+	}
+}
+
+// GeminiHandler handles POST requests for Gemini API operations.
+// It routes requests to appropriate handlers based on the action parameter (model:method format).
+func (h *GeminiAPIHandlers) GeminiHandler(c *gin.Context) {
+	var request struct {
+		Action string `uri:"action" binding:"required"`
+	}
+	if err := c.ShouldBindUri(&request); err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+	action := strings.Split(request.Action, ":")
+	if len(action) != 2 {
+		c.JSON(http.StatusNotFound, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("%s not found.", c.Request.URL.Path),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	modelName := action[0]
+	method := action[1]
+	rawJSON, _ := c.GetRawData()
+	rawJSON, _ = sjson.SetBytes(rawJSON, "model", []byte(modelName))
+
+	provider := util.GetProviderName(modelName)
+	if provider == "gemini" || provider == "unknow" {
+		switch method {
+		case "generateContent":
+			h.handleGeminiGenerateContent(c, rawJSON)
+		case "streamGenerateContent":
+			h.handleGeminiStreamGenerateContent(c, rawJSON)
+		case "countTokens":
+			h.handleGeminiCountTokens(c, rawJSON)
+		}
+	} else if provider == "gpt" {
+		switch method {
+		case "generateContent":
+			h.handleCodexGenerateContent(c, rawJSON)
+		case "streamGenerateContent":
+			h.handleCodexStreamGenerateContent(c, rawJSON)
+		}
+
+	}
+}
+
+func (h *GeminiAPIHandlers) handleGeminiStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	alt := h.GetAlt(c)
+
+	if alt == "" {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		template := ""
+		parsed := gjson.Parse(string(rawJSON))
+		contents := parsed.Get("request.contents")
+		if contents.Exists() {
+			template = string(rawJSON)
+		} else {
+			template = `{"project":"","request":{},"model":""}`
+			template, _ = sjson.SetRaw(template, "request", string(rawJSON))
+			template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
+			template, _ = sjson.Delete(template, "request.model")
+		}
+
+		template, errFixCLIToolResponse := translatorGeminiToGeminiCli.FixCLIToolResponse(template)
+		if errFixCLIToolResponse != nil {
+			c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: errFixCLIToolResponse.Error(),
+					Type:    "server_error",
+				},
+			})
+			cliCancel()
+			return
+		}
+
+		systemInstructionResult := gjson.Get(template, "request.system_instruction")
+		if systemInstructionResult.Exists() {
+			template, _ = sjson.SetRaw(template, "request.systemInstruction", systemInstructionResult.Raw)
+			template, _ = sjson.Delete(template, "request.system_instruction")
+		}
+		rawJSON = []byte(template)
+
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use generative language API Key: %s", glAPIKey)
+		} else {
+			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+		}
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, alt)
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+				if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() == "" {
+					if alt == "" {
+						responseResult := gjson.GetBytes(chunk, "response")
+						if responseResult.Exists() {
+							chunk = []byte(responseResult.Raw)
+						}
+					} else {
+						chunkTemplate := "[]"
+						responseResult := gjson.ParseBytes(chunk)
+						if responseResult.IsArray() {
+							responseResultItems := responseResult.Array()
+							for i := 0; i < len(responseResultItems); i++ {
+								responseResultItem := responseResultItems[i]
+								if responseResultItem.Get("response").Exists() {
+									chunkTemplate, _ = sjson.SetRaw(chunkTemplate, "-1", responseResultItem.Get("response").Raw)
+								}
+							}
+						}
+						chunk = []byte(chunkTemplate)
+					}
+				}
+				if alt == "" {
+					_, _ = c.Writer.Write([]byte("data: "))
+					_, _ = c.Writer.Write(chunk)
+					_, _ = c.Writer.Write([]byte("\n\n"))
+				} else {
+					_, _ = c.Writer.Write(chunk)
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						log.Debugf("quota exceeded, switch client")
+						continue outLoop
+					} else {
+						log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	alt := h.GetAlt(c)
+	// orgrawJSON := rawJSON
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName, false)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			cliCancel()
+			return
+		}
+
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use generative language API Key: %s", glAPIKey)
+		} else {
+			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+
+			template := `{"request":{}}`
+			if gjson.GetBytes(rawJSON, "generateContentRequest").Exists() {
+				template, _ = sjson.SetRaw(template, "request", gjson.GetBytes(rawJSON, "generateContentRequest").Raw)
+				template, _ = sjson.Delete(template, "generateContentRequest")
+			} else if gjson.GetBytes(rawJSON, "contents").Exists() {
+				template, _ = sjson.SetRaw(template, "request.contents", gjson.GetBytes(rawJSON, "contents").Raw)
+				template, _ = sjson.Delete(template, "contents")
+			}
+			rawJSON = []byte(template)
+		}
+
+		resp, err := cliClient.SendRawTokenCount(cliCtx, rawJSON, alt)
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				cliCancel()
+				// log.Debugf(err.Error.Error())
+				// log.Debugf(string(rawJSON))
+				// log.Debugf(string(orgrawJSON))
+			}
+			break
+		} else {
+			if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() == "" {
+				responseResult := gjson.GetBytes(resp, "response")
+				if responseResult.Exists() {
+					resp = []byte(responseResult.Raw)
+				}
+			}
+			_, _ = c.Writer.Write(resp)
+			cliCancel()
+			break
+		}
+	}
+}
+
+func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	alt := h.GetAlt(c)
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			cliCancel()
+			return
+		}
+
+		template := ""
+		parsed := gjson.Parse(string(rawJSON))
+		contents := parsed.Get("request.contents")
+		if contents.Exists() {
+			template = string(rawJSON)
+		} else {
+			template = `{"project":"","request":{},"model":""}`
+			template, _ = sjson.SetRaw(template, "request", string(rawJSON))
+			template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
+			template, _ = sjson.Delete(template, "request.model")
+		}
+
+		template, errFixCLIToolResponse := translatorGeminiToGeminiCli.FixCLIToolResponse(template)
+		if errFixCLIToolResponse != nil {
+			c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: errFixCLIToolResponse.Error(),
+					Type:    "server_error",
+				},
+			})
+			cliCancel()
+			return
+		}
+
+		systemInstructionResult := gjson.Get(template, "request.system_instruction")
+		if systemInstructionResult.Exists() {
+			template, _ = sjson.SetRaw(template, "request.systemInstruction", systemInstructionResult.Raw)
+			template, _ = sjson.Delete(template, "request.system_instruction")
+		}
+		rawJSON = []byte(template)
+
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use generative language API Key: %s", glAPIKey)
+		} else {
+			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+		}
+		resp, err := cliClient.SendRawMessage(cliCtx, rawJSON, alt)
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				cliCancel()
+			}
+			break
+		} else {
+			if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() == "" {
+				responseResult := gjson.GetBytes(resp, "response")
+				if responseResult.Exists() {
+					resp = []byte(responseResult.Raw)
+				}
+			}
+			_, _ = c.Writer.Write(resp)
+			cliCancel()
+			break
+		}
+	}
+}
+
+func (h *GeminiAPIHandlers) handleCodexStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToCodex.ConvertGeminiRequestToCodex(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		params := &translatorGeminiToCodex.ConvertCodexResponseToGeminiParams{
+			Model:             modelName.String(),
+			CreatedAt:         0,
+			ResponseID:        "",
+			LastStorageOutput: "",
+		}
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() != "" {
+						outputs := translatorGeminiToCodex.ConvertCodexResponseToGemini(jsonData, params)
+						if len(outputs) > 0 {
+							for i := 0; i < len(outputs); i++ {
+								_, _ = c.Writer.Write([]byte("data: "))
+								_, _ = c.Writer.Write([]byte(outputs[i]))
+								_, _ = c.Writer.Write([]byte("\n\n"))
+							}
+						}
+					}
+					// log.Debugf(string(jsonData))
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiAPIHandlers) handleCodexGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToCodex.ConvertGeminiRequestToCodex(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() != "" {
+						var geminiStr string
+						geminiStr = translatorGeminiToCodex.ConvertCodexResponseToGeminiNonStream(jsonData, modelName.String())
+						if geminiStr != "" {
+							_, _ = c.Writer.Write([]byte(geminiStr))
+						}
+					}
+				}
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
--- a/internal/api/handlers/handlers.go
+++ b/internal/api/handlers/handlers.go
@@ -5,52 +5,78 @@ package handlers

 import (
 	"fmt"
+	"sync"
+
 	"github.com/gin-gonic/gin"
 	"github.com/luispater/CLIProxyAPI/internal/client"
 	"github.com/luispater/CLIProxyAPI/internal/config"
+	"github.com/luispater/CLIProxyAPI/internal/util"
 	log "github.com/sirupsen/logrus"
-	"sync"
 )

 // ErrorResponse represents a standard error response format for the API.
 // It contains a single ErrorDetail field.
 type ErrorResponse struct {
+	// Error contains detailed information about the error that occurred.
 	Error ErrorDetail `json:"error"`
 }

 // ErrorDetail provides specific information about an error that occurred.
 // It includes a human-readable message, an error type, and an optional error code.
 type ErrorDetail struct {
-	// A human-readable message providing more details about the error.
+	// Message is a human-readable message providing more details about the error.
 	Message string `json:"message"`
-	// The type of error that occurred (e.g., "invalid_request_error").
+
+	// Type is the category of error that occurred (e.g., "invalid_request_error").
 	Type string `json:"type"`
-	// A short code identifying the error, if applicable.
+
+	// Code is a short code identifying the error, if applicable.
 	Code string `json:"code,omitempty"`
 }

 // APIHandlers contains the handlers for API endpoints.
-// It holds a pool of clients to interact with the backend service.
+// It holds a pool of clients to interact with the backend service and manages
+// load balancing, client selection, and configuration.
 type APIHandlers struct {
-	CliClients          []*client.Client
-	Cfg                 *config.Config
-	Mutex               *sync.Mutex
-	LastUsedClientIndex int
+	// CliClients is the pool of available AI service clients.
+	CliClients []client.Client
+
+	// Cfg holds the current application configuration.
+	Cfg *config.Config
+
+	// Mutex ensures thread-safe access to shared resources.
+	Mutex *sync.Mutex
+
+	// LastUsedClientIndex tracks the last used client index for each provider
+	// to implement round-robin load balancing.
+	LastUsedClientIndex map[string]int
 }

 // NewAPIHandlers creates a new API handlers instance.
-// It takes a slice of clients and a debug flag as input.
-func NewAPIHandlers(cliClients []*client.Client, cfg *config.Config) *APIHandlers {
+// It takes a slice of clients and configuration as input.
+//
+// Parameters:
+//   - cliClients: A slice of AI service clients
+//   - cfg: The application configuration
+//
+// Returns:
+//   - *APIHandlers: A new API handlers instance
+func NewAPIHandlers(cliClients []client.Client, cfg *config.Config) *APIHandlers {
 	return &APIHandlers{
 		CliClients:          cliClients,
 		Cfg:                 cfg,
 		Mutex:               &sync.Mutex{},
-		LastUsedClientIndex: 0,
+		LastUsedClientIndex: make(map[string]int),
 	}
 }

-// UpdateClients updates the handlers' client list and configuration
-func (h *APIHandlers) UpdateClients(clients []*client.Client, cfg *config.Config) {
+// UpdateClients updates the handlers' client list and configuration.
+// This method is called when the configuration or authentication tokens change.
+//
+// Parameters:
+//   - clients: The new slice of AI service clients
+//   - cfg: The new application configuration
+func (h *APIHandlers) UpdateClients(clients []client.Client, cfg *config.Config) {
 	h.CliClients = clients
 	h.Cfg = cfg
 }
@@ -58,30 +84,63 @@ func (h *APIHandlers) UpdateClients(clients []*client.Client, cfg *config.Config
 // GetClient returns an available client from the pool using round-robin load balancing.
 // It checks for quota limits and tries to find an unlocked client for immediate use.
 // The modelName parameter is used to check quota status for specific models.
-func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (*client.Client, *client.ErrorMessage) {
-	if len(h.CliClients) == 0 {
+//
+// Parameters:
+//   - modelName: The name of the model to be used
+//   - isGenerateContent: Optional parameter to indicate if this is for content generation
+//
+// Returns:
+//   - client.Client: An available client for the requested model
+//   - *client.ErrorMessage: An error message if no client is available
+func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (client.Client, *client.ErrorMessage) {
+	provider := util.GetProviderName(modelName)
+	clients := make([]client.Client, 0)
+	if provider == "gemini" {
+		for i := 0; i < len(h.CliClients); i++ {
+			if cli, ok := h.CliClients[i].(*client.GeminiClient); ok {
+				clients = append(clients, cli)
+			}
+		}
+	} else if provider == "gpt" {
+		for i := 0; i < len(h.CliClients); i++ {
+			if cli, ok := h.CliClients[i].(*client.CodexClient); ok {
+				clients = append(clients, cli)
+			}
+		}
+	}
+
+	if _, hasKey := h.LastUsedClientIndex[provider]; !hasKey {
+		h.LastUsedClientIndex[provider] = 0
+	}
+
+	if len(clients) == 0 {
 		return nil, &client.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
 	}

-	var cliClient *client.Client
+	var cliClient client.Client

 	// Lock the mutex to update the last used client index
 	h.Mutex.Lock()
-	startIndex := h.LastUsedClientIndex
+	startIndex := h.LastUsedClientIndex[provider]
 	if (len(isGenerateContent) > 0 && isGenerateContent[0]) || len(isGenerateContent) == 0 {
-		currentIndex := (startIndex + 1) % len(h.CliClients)
-		h.LastUsedClientIndex = currentIndex
+		currentIndex := (startIndex + 1) % len(clients)
+		h.LastUsedClientIndex[provider] = currentIndex
 	}
 	h.Mutex.Unlock()

 	// Reorder the client to start from the last used index
-	reorderedClients := make([]*client.Client, 0)
-	for i := 0; i < len(h.CliClients); i++ {
-		cliClient = h.CliClients[(startIndex+1+i)%len(h.CliClients)]
+	reorderedClients := make([]client.Client, 0)
+	for i := 0; i < len(clients); i++ {
+		cliClient = clients[(startIndex+1+i)%len(clients)]
 		if cliClient.IsModelQuotaExceeded(modelName) {
-			log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
+			if provider == "gemini" {
+				log.Debugf("Gemini Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+			} else if provider == "gpt" {
+				log.Debugf("Codex Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
+			}
 			cliClient = nil
 			continue
+
 		}
 		reorderedClients = append(reorderedClients, cliClient)
 	}
@@ -93,14 +152,14 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (*c
 	locked := false
 	for i := 0; i < len(reorderedClients); i++ {
 		cliClient = reorderedClients[i]
-		if cliClient.RequestMutex.TryLock() {
+		if cliClient.GetRequestMutex().TryLock() {
 			locked = true
 			break
 		}
 	}
 	if !locked {
-		cliClient = h.CliClients[0]
-		cliClient.RequestMutex.Lock()
+		cliClient = clients[0]
+		cliClient.GetRequestMutex().Lock()
 	}

 	return cliClient, nil
@@ -108,6 +167,12 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (*c

 // GetAlt extracts the 'alt' parameter from the request query string.
 // It checks both 'alt' and '$alt' parameters and returns the appropriate value.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request
+//
+// Returns:
+//   - string: The alt parameter value, or empty string if it's "sse"
 func (h *APIHandlers) GetAlt(c *gin.Context) string {
 	var alt string
 	var hasAlt bool
--- a/internal/api/handlers/openai/openai-handlers.go
+++ b/internal/api/handlers/openai/openai-handlers.go
@@ -1,264 +0,0 @@
-// Package openai provides HTTP handlers for OpenAI API endpoints.
-// This package implements the OpenAI-compatible API interface, including model listing
-// and chat completion functionality. It supports both streaming and non-streaming responses,
-// and manages a pool of clients to interact with backend services.
-// The handlers translate OpenAI API requests to the appropriate backend format and
-// convert responses back to OpenAI-compatible format.
-package openai
-
-import (
-	"context"
-	"fmt"
-	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
-	"github.com/luispater/CLIProxyAPI/internal/api/translator/openai"
-	"github.com/luispater/CLIProxyAPI/internal/client"
-	log "github.com/sirupsen/logrus"
-	"github.com/tidwall/gjson"
-	"net/http"
-	"time"
-
-	"github.com/gin-gonic/gin"
-)
-
-// OpenAIAPIHandlers contains the handlers for OpenAI API endpoints.
-// It holds a pool of clients to interact with the backend service.
-type OpenAIAPIHandlers struct {
-	*handlers.APIHandlers
-}
-
-// NewOpenAIAPIHandlers creates a new OpenAI API handlers instance.
-// It takes an APIHandlers instance as input and returns an OpenAIAPIHandlers.
-func NewOpenAIAPIHandlers(apiHandlers *handlers.APIHandlers) *OpenAIAPIHandlers {
-	return &OpenAIAPIHandlers{
-		APIHandlers: apiHandlers,
-	}
-}
-
-// Models handles the /v1/models endpoint.
-// It returns a hardcoded list of available AI models.
-func (h *OpenAIAPIHandlers) Models(c *gin.Context) {
-	c.JSON(http.StatusOK, gin.H{
-		"data": []map[string]any{
-			{
-				"id":                    "gemini-2.5-pro",
-				"object":                "model",
-				"version":               "2.5",
-				"name":                  "Gemini 2.5 Pro",
-				"description":           "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-				"context_length":        1048576,
-				"max_completion_tokens": 65536,
-				"supported_parameters": []string{
-					"tools",
-					"temperature",
-					"top_p",
-					"top_k",
-				},
-				"temperature":    1,
-				"topP":           0.95,
-				"topK":           64,
-				"maxTemperature": 2,
-				"thinking":       true,
-			},
-			{
-				"id":                    "gemini-2.5-flash",
-				"object":                "model",
-				"version":               "001",
-				"name":                  "Gemini 2.5 Flash",
-				"description":           "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-				"context_length":        1048576,
-				"max_completion_tokens": 65536,
-				"supported_parameters": []string{
-					"tools",
-					"temperature",
-					"top_p",
-					"top_k",
-				},
-				"temperature":    1,
-				"topP":           0.95,
-				"topK":           64,
-				"maxTemperature": 2,
-				"thinking":       true,
-			},
-		},
-	})
-}
-
-// ChatCompletions handles the /v1/chat/completions endpoint.
-// It determines whether the request is for a streaming or non-streaming response
-// and calls the appropriate handler.
-func (h *OpenAIAPIHandlers) ChatCompletions(c *gin.Context) {
-	rawJSON, err := c.GetRawData()
-	// If data retrieval fails, return a 400 Bad Request error.
-	if err != nil {
-		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: fmt.Sprintf("Invalid request: %v", err),
-				Type:    "invalid_request_error",
-			},
-		})
-		return
-	}
-
-	// Check if the client requested a streaming response.
-	streamResult := gjson.GetBytes(rawJSON, "stream")
-	if streamResult.Type == gjson.True {
-		h.handleStreamingResponse(c, rawJSON)
-	} else {
-		h.handleNonStreamingResponse(c, rawJSON)
-	}
-}
-
-// handleNonStreamingResponse handles non-streaming chat completion responses.
-// It selects a client from the pool, sends the request, and aggregates the response
-// before sending it back to the client.
-func (h *OpenAIAPIHandlers) handleNonStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	modelName, systemInstruction, contents, tools := openai.PrepareRequest(rawJSON)
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			cliCancel()
-			return
-		}
-
-		isGlAPIKey := false
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-			isGlAPIKey = true
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-		}
-
-		resp, err := cliClient.SendMessage(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
-		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-				continue
-			} else {
-				c.Status(err.StatusCode)
-				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				cliCancel()
-			}
-			break
-		} else {
-			openAIFormat := openai.ConvertCliToOpenAINonStream(resp, time.Now().Unix(), isGlAPIKey)
-			if openAIFormat != "" {
-				_, _ = c.Writer.Write([]byte(openAIFormat))
-			}
-			cliCancel()
-			break
-		}
-	}
-}
-
-// handleStreamingResponse handles streaming responses
-func (h *OpenAIAPIHandlers) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
-	// Get the http.Flusher interface to manually flush the response.
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	// Prepare the request for the backend client.
-	modelName, systemInstruction, contents, tools := openai.PrepareRequest(rawJSON)
-	cliCtx, cliCancel := context.WithCancel(context.Background())
-	var cliClient *client.Client
-	defer func() {
-		// Ensure the client's mutex is unlocked on function exit.
-		if cliClient != nil {
-			cliClient.RequestMutex.Unlock()
-		}
-	}()
-
-outLoop:
-	for {
-		var errorResponse *client.ErrorMessage
-		cliClient, errorResponse = h.GetClient(modelName)
-		if errorResponse != nil {
-			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
-			flusher.Flush()
-			cliCancel()
-			return
-		}
-
-		isGlAPIKey := false
-		if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
-			log.Debugf("Request use generative language API Key: %s", glAPIKey)
-			isGlAPIKey = true
-		} else {
-			log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
-		}
-		// Send the message and receive response chunks and errors via channels.
-		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
-		hasFirstResponse := false
-		for {
-			select {
-			// Handle client disconnection.
-			case <-c.Request.Context().Done():
-				if c.Request.Context().Err().Error() == "context canceled" {
-					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
-					cliCancel() // Cancel the backend request.
-					return
-				}
-			// Process incoming response chunks.
-			case chunk, okStream := <-respChan:
-				if !okStream {
-					// Stream is closed, send the final [DONE] message.
-					_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
-					flusher.Flush()
-					cliCancel()
-					return
-				}
-				// Convert the chunk to OpenAI format and send it to the client.
-				hasFirstResponse = true
-				openAIFormat := openai.ConvertCliToOpenAI(chunk, time.Now().Unix(), isGlAPIKey)
-				if openAIFormat != "" {
-					_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormat)
-					flusher.Flush()
-				}
-			// Handle errors from the backend.
-			case err, okError := <-errChan:
-				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop
-					} else {
-						c.Status(err.StatusCode)
-						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						flusher.Flush()
-						cliCancel()
-					}
-					return
-				}
-			// Send a keep-alive signal to the client.
-			case <-time.After(500 * time.Millisecond):
-				if hasFirstResponse {
-					_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
-					flusher.Flush()
-				}
-			}
-		}
-	}
-}
--- a/internal/api/handlers/openai/openai_handlers.go
+++ b/internal/api/handlers/openai/openai_handlers.go
@@ -0,0 +1,506 @@
+// Package openai provides HTTP handlers for OpenAI API endpoints.
+// This package implements the OpenAI-compatible API interface, including model listing
+// and chat completion functionality. It supports both streaming and non-streaming responses,
+// and manages a pool of clients to interact with backend services.
+// The handlers translate OpenAI API requests to the appropriate backend format and
+// convert responses back to OpenAI-compatible format.
+package openai
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
+	"github.com/luispater/CLIProxyAPI/internal/client"
+	translatorOpenAIToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/openai"
+	translatorOpenAIToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/openai"
+	"github.com/luispater/CLIProxyAPI/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+
+	"github.com/gin-gonic/gin"
+)
+
+// OpenAIAPIHandlers contains the handlers for OpenAI API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type OpenAIAPIHandlers struct {
+	*handlers.APIHandlers
+}
+
+// NewOpenAIAPIHandlers creates a new OpenAI API handlers instance.
+// It takes an APIHandlers instance as input and returns an OpenAIAPIHandlers.
+//
+// Parameters:
+//   - apiHandlers: The base API handlers instance
+//
+// Returns:
+//   - *OpenAIAPIHandlers: A new OpenAI API handlers instance
+func NewOpenAIAPIHandlers(apiHandlers *handlers.APIHandlers) *OpenAIAPIHandlers {
+	return &OpenAIAPIHandlers{
+		APIHandlers: apiHandlers,
+	}
+}
+
+// Models handles the /v1/models endpoint.
+// It returns a hardcoded list of available AI models with their capabilities
+// and specifications in OpenAI-compatible format.
+func (h *OpenAIAPIHandlers) Models(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{
+		"data": []map[string]any{
+			{
+				"id":                    "gemini-2.5-pro",
+				"object":                "model",
+				"version":               "2.5",
+				"name":                  "Gemini 2.5 Pro",
+				"description":           "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+				"context_length":        1_048_576,
+				"max_completion_tokens": 65_536,
+				"supported_parameters": []string{
+					"tools",
+					"temperature",
+					"top_p",
+					"top_k",
+				},
+				"temperature":    1,
+				"topP":           0.95,
+				"topK":           64,
+				"maxTemperature": 2,
+				"thinking":       true,
+			},
+			{
+				"id":                    "gemini-2.5-flash",
+				"object":                "model",
+				"version":               "001",
+				"name":                  "Gemini 2.5 Flash",
+				"description":           "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+				"context_length":        1_048_576,
+				"max_completion_tokens": 65_536,
+				"supported_parameters": []string{
+					"tools",
+					"temperature",
+					"top_p",
+					"top_k",
+				},
+				"temperature":    1,
+				"topP":           0.95,
+				"topK":           64,
+				"maxTemperature": 2,
+				"thinking":       true,
+			},
+			{
+				"id":                    "gpt-5",
+				"object":                "model",
+				"version":               "gpt-5-2025-08-07",
+				"name":                  "GPT 5",
+				"description":           "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+				"context_length":        400_000,
+				"max_completion_tokens": 128_000,
+				"supported_parameters": []string{
+					"tools",
+				},
+				"temperature":    1,
+				"topP":           0.95,
+				"topK":           64,
+				"maxTemperature": 2,
+				"thinking":       true,
+			},
+		},
+	})
+}
+
+// ChatCompletions handles the /v1/chat/completions endpoint.
+// It determines whether the request is for a streaming or non-streaming response
+// and calls the appropriate handler based on the model provider.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+func (h *OpenAIAPIHandlers) ChatCompletions(c *gin.Context) {
+	rawJSON, err := c.GetRawData()
+	// If data retrieval fails, return a 400 Bad Request error.
+	if err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	// Check if the client requested a streaming response.
+	streamResult := gjson.GetBytes(rawJSON, "stream")
+	modelName := gjson.GetBytes(rawJSON, "model")
+	provider := util.GetProviderName(modelName.String())
+	if provider == "gemini" {
+		if streamResult.Type == gjson.True {
+			h.handleGeminiStreamingResponse(c, rawJSON)
+		} else {
+			h.handleGeminiNonStreamingResponse(c, rawJSON)
+		}
+	} else if provider == "gpt" {
+		if streamResult.Type == gjson.True {
+			h.handleCodexStreamingResponse(c, rawJSON)
+		} else {
+			h.handleCodexNonStreamingResponse(c, rawJSON)
+		}
+	}
+}
+
+// handleGeminiNonStreamingResponse handles non-streaming chat completion responses
+// for Gemini models. It selects a client from the pool, sends the request, and
+// aggregates the response before sending it back to the client in OpenAI format.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			cliCancel()
+			return
+		}
+
+		isGlAPIKey := false
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use generative language API Key: %s", glAPIKey)
+			isGlAPIKey = true
+		} else {
+			log.Debugf("Request cli use account: %s, project id: %s", cliClient.(*client.GeminiClient).GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+		}
+
+		resp, err := cliClient.SendMessage(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				cliCancel()
+			}
+			break
+		} else {
+			openAIFormat := translatorOpenAIToGeminiCli.ConvertCliResponseToOpenAIChatNonStream(resp, time.Now().Unix(), isGlAPIKey)
+			if openAIFormat != "" {
+				_, _ = c.Writer.Write([]byte(openAIFormat))
+			}
+			cliCancel()
+			break
+		}
+	}
+}
+
+// handleGeminiStreamingResponse handles streaming responses for Gemini models.
+// It establishes a streaming connection with the backend service and forwards
+// the response chunks to the client in real-time using Server-Sent Events.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Prepare the request for the backend client.
+	modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		isGlAPIKey := false
+		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
+			log.Debugf("Request use generative language API Key: %s", glAPIKey)
+			isGlAPIKey = true
+		} else {
+			log.Debugf("Request cli use account: %s, project id: %s", cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
+		}
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
+		hasFirstResponse := false
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					// Stream is closed, send the final [DONE] message.
+					_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+				// Convert the chunk to OpenAI format and send it to the client.
+				hasFirstResponse = true
+				openAIFormat := translatorOpenAIToGeminiCli.ConvertCliResponseToOpenAIChat(chunk, time.Now().Unix(), isGlAPIKey)
+				if openAIFormat != "" {
+					_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormat)
+					flusher.Flush()
+				}
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+				if hasFirstResponse {
+					_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
+					flusher.Flush()
+				}
+			}
+		}
+	}
+}
+
+// handleCodexNonStreamingResponse handles non-streaming chat completion responses
+// for OpenAI models. It selects a client from the pool, sends the request, and
+// aggregates the response before sending it back to the client in OpenAI format.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleCodexNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	newRequestJSON := translatorOpenAIToCodex.ConvertOpenAIChatRequestToCodex(rawJSON)
+	modelName := gjson.GetBytes(rawJSON, "model")
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = c.Writer.Write([]byte(errorResponse.Error.Error()))
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() == "response.completed" {
+						responseResult := data.Get("response")
+						openaiStr := translatorOpenAIToCodex.ConvertCodexResponseToOpenAIChatNonStream(responseResult.Raw, time.Now().Unix())
+						_, _ = c.Writer.Write([]byte(openaiStr))
+					}
+				}
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = c.Writer.Write([]byte(err.Error.Error()))
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+// handleCodexStreamingResponse handles streaming responses for OpenAI models.
+// It establishes a streaming connection with the backend service and forwards
+// the response chunks to the client in real-time using Server-Sent Events.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorOpenAIToCodex.ConvertOpenAIChatRequestToCodex(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := context.WithCancel(context.Background())
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request codex use account: %s", cliClient.GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		var params *translatorOpenAIToCodex.ConvertCliToOpenAIParams
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					_, _ = c.Writer.Write([]byte("[done]\n\n"))
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+				// log.Debugf("Response: %s\n", string(chunk))
+				// Convert the chunk to OpenAI format and send it to the client.
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() != "" {
+						var openaiStr string
+						params, openaiStr = translatorOpenAIToCodex.ConvertCodexResponseToOpenAIChat(jsonData, params)
+						if openaiStr != "" {
+							_, _ = c.Writer.Write([]byte("data: "))
+							_, _ = c.Writer.Write([]byte(openaiStr))
+							_, _ = c.Writer.Write([]byte("\n\n"))
+						}
+					}
+					// log.Debugf(string(jsonData))
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel()
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}