Add claude code support

2026-02-03 04:50:52 +08:00 · 2025-08-19 01:16:52 +08:00
parent c5cc238308
commit d58cc55cb2
40 changed files with 4429 additions and 185 deletions
--- a/internal/api/handlers/claude/code_handlers.go
+++ b/internal/api/handlers/claude/code_handlers.go
@@ -11,7 +11,6 @@ import (
 	"context"
 	"fmt"
 	"net/http"
-	"strings"
 	"time"

 	"github.com/gin-gonic/gin"
@@ -60,10 +59,19 @@ func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
 	// h.handleCodexStreamingResponse(c, rawJSON)
 	modelName := gjson.GetBytes(rawJSON, "model")
 	provider := util.GetProviderName(modelName.String())
+
+	// Check if the client requested a streaming response.
+	streamResult := gjson.GetBytes(rawJSON, "stream")
+	if streamResult.Type == gjson.False {
+		return
+	}
+
 	if provider == "gemini" {
 		h.handleGeminiStreamingResponse(c, rawJSON)
 	} else if provider == "gpt" {
 		h.handleCodexStreamingResponse(c, rawJSON)
+	} else if provider == "claude" {
+		h.handleClaudeStreamingResponse(c, rawJSON)
 	} else {
 		h.handleGeminiStreamingResponse(c, rawJSON)
 	}
@@ -98,14 +106,6 @@ func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, ra
 	// conversation contents, and available tools from the raw JSON
 	modelName, systemInstruction, contents, tools := translatorClaudeCodeToGeminiCli.ConvertClaudeCodeRequestToCli(rawJSON)

-	// Map Claude model names to corresponding Gemini models
-	// This allows the proxy to handle Claude API calls using Gemini backend
-	if modelName == "claude-sonnet-4-20250514" {
-		modelName = "gemini-2.5-pro"
-	} else if modelName == "claude-3-5-haiku-20241022" {
-		modelName = "gemini-2.5-flash"
-	}
-
 	// Create a cancellable context for the backend client request
 	// This allows proper cleanup and cancellation of ongoing requests
 	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
@@ -128,7 +128,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -146,12 +146,7 @@ outLoop:
 		// Initiate streaming communication with the backend client
 		// This returns two channels: one for response chunks and one for errors

-		includeThoughts := false
-		if userAgent, hasKey := c.Request.Header["User-Agent"]; hasKey {
-			includeThoughts = !strings.Contains(userAgent[0], "claude-cli")
-		}
-
-		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, includeThoughts)
+		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, true)

 		// Track response state for proper Claude format conversion
 		hasFirstResponse := false
@@ -188,6 +183,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
 				// Convert the backend response to Claude-compatible format
 				// This translation layer ensures API compatibility
 				claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
@@ -221,12 +217,12 @@ outLoop:
 				if hasFirstResponse {
 					// Send a ping event to maintain the connection
 					// This is especially important for slow AI model responses
-					output := "event: ping\n"
-					output = output + `data: {"type": "ping"}`
-					output = output + "\n\n\n"
-					_, _ = c.Writer.Write([]byte(output))
-
-					flusher.Flush()
+					// output := "event: ping\n"
+					// output = output + `data: {"type": "ping"}`
+					// output = output + "\n\n\n"
+					// _, _ = c.Writer.Write([]byte(output))
+					//
+					// flusher.Flush()
 				}
 			}
 		}
@@ -262,13 +258,7 @@ func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, raw
 	// conversation contents, and available tools from the raw JSON
 	newRequestJSON := translatorClaudeCodeToCodex.ConvertClaudeCodeRequestToCodex(rawJSON)
 	modelName := gjson.GetBytes(rawJSON, "model").String()
-	// Map Claude model names to corresponding Gemini models
-	// This allows the proxy to handle Claude API calls using Gemini backend
-	if modelName == "claude-sonnet-4-20250514" {
-		modelName = "gpt-5"
-	} else if modelName == "claude-3-5-haiku-20241022" {
-		modelName = "gpt-5"
-	}
+
 	newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
 	// log.Debugf(string(rawJSON))
 	// log.Debugf(newRequestJSON)
@@ -294,7 +284,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -307,7 +297,7 @@ outLoop:
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")

 		// Track response state for proper Claude format conversion
-		hasFirstResponse := false
+		// hasFirstResponse := false
 		hasToolCall := false

 		// Main streaming loop - handles multiple concurrent events using Go channels
@@ -333,6 +323,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				// Convert the backend response to Claude-compatible format
 				// This translation layer ensures API compatibility
@@ -346,7 +337,7 @@ outLoop:
 						_, _ = c.Writer.Write([]byte("\n"))
 					}
 					flusher.Flush() // Immediately send the chunk to the client
-					hasFirstResponse = true
+					// hasFirstResponse = true
 				} else {
 					// log.Debugf("chunk: %s", string(chunk))
 				}
@@ -373,16 +364,156 @@ outLoop:
 			// Case 4: Send periodic keep-alive signals
 			// Prevents connection timeouts during long-running requests
 			case <-time.After(3000 * time.Millisecond):
-				if hasFirstResponse {
-					// Send a ping event to maintain the connection
-					// This is especially important for slow AI model responses
-					output := "event: ping\n"
-					output = output + `data: {"type": "ping"}`
-					output = output + "\n\n"
-					_, _ = c.Writer.Write([]byte(output))
-
-					flusher.Flush()
-				}
+				// if hasFirstResponse {
+				// 	// Send a ping event to maintain the connection
+				// 	// This is especially important for slow AI model responses
+				// 	output := "event: ping\n"
+				// 	output = output + `data: {"type": "ping"}`
+				// 	output = output + "\n\n"
+				// 	_, _ = c.Writer.Write([]byte(output))
+				//
+				// 	flusher.Flush()
+				// }
+			}
+		}
+	}
+}
+
+// handleClaudeStreamingResponse streams Claude-compatible responses backed by OpenAI.
+// It converts the Claude request into OpenAI responses format, establishes SSE,
+// and translates streaming chunks back into Claude Code events.
+func (h *ClaudeCodeAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {
+
+	// Get the http.Flusher interface to manually flush the response.
+	// This is crucial for streaming as it allows immediate sending of data chunks
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+
+	// Create a cancellable context for the backend client request
+	// This allows proper cleanup and cancellation of ongoing requests
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		// This prevents deadlocks and ensures proper resource cleanup
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	// Main client rotation loop with quota management
+	// This loop implements a sophisticated load balancing and failover mechanism
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+
+			if errorResponse.StatusCode == 429 {
+				c.Header("Content-Type", "application/json")
+				c.Header("Content-Length", fmt.Sprintf("%d", len(errorResponse.Error.Error())))
+			}
+			c.Status(errorResponse.StatusCode)
+
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+
+			return
+		}
+
+		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
+			log.Debugf("Request claude use API Key: %s", apiKey)
+		} else {
+			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
+		}
+
+		// Initiate streaming communication with the backend client
+		// This returns two channels: one for response chunks and one for errors
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
+
+		hasFirstResponse := false
+		// Main streaming loop - handles multiple concurrent events using Go channels
+		// This select statement manages four different types of events simultaneously
+		for {
+			select {
+			// Case 1: Handle client disconnection
+			// Detects when the HTTP client has disconnected and cleans up resources
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("ClaudeClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request to prevent resource leaks
+					return
+				}
+
+			// Case 2: Process incoming response chunks from the backend
+			// This handles the actual streaming data from the AI model
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+				if !hasFirstResponse {
+					// Set up Server-Sent Events (SSE) headers for streaming response
+					// These headers are essential for maintaining a persistent connection
+					// and enabling real-time streaming of chat completions
+					c.Header("Content-Type", "text/event-stream")
+					c.Header("Cache-Control", "no-cache")
+					c.Header("Connection", "keep-alive")
+					c.Header("Access-Control-Allow-Origin", "*")
+					hasFirstResponse = true
+				}
+
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n"))
+				flusher.Flush()
+
+			// Case 3: Handle errors from the backend
+			// This manages various error conditions and implements retry logic
+			case errInfo, okError := <-errChan:
+				if okError {
+					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
+					// Special handling for quota exceeded errors
+					// If configured, attempt to switch to a different project/client
+					// if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						log.Debugf("quota exceeded, switch client")
+						continue outLoop // Restart the client selection process
+					} else {
+						// Forward other errors directly to the client
+						if errInfo.Addon != nil {
+							for key, val := range errInfo.Addon {
+								c.Header(key, val[0])
+							}
+						}
+
+						c.Status(errInfo.StatusCode)
+
+						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
+						flusher.Flush()
+						cliCancel(errInfo.Error)
+					}
+					return
+				}
+
+			// Case 4: Send periodic keep-alive signals
+			// Prevents connection timeouts during long-running requests
+			case <-time.After(3000 * time.Millisecond):
 			}
 		}
 	}
--- a/internal/api/handlers/gemini/cli/cli_handlers.go
+++ b/internal/api/handlers/gemini/cli/cli_handlers.go
@@ -16,6 +16,7 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
 	"github.com/luispater/CLIProxyAPI/internal/client"
+	translatorGeminiToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/gemini"
 	translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
 	"github.com/luispater/CLIProxyAPI/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -61,12 +62,16 @@ func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
 			h.handleInternalGenerateContent(c, rawJSON)
 		} else if provider == "gpt" {
 			h.handleCodexInternalGenerateContent(c, rawJSON)
+		} else if provider == "claude" {
+			h.handleClaudeInternalGenerateContent(c, rawJSON)
 		}
 	} else if requestRawURI == "/v1internal:streamGenerateContent" {
 		if provider == "gemini" || provider == "unknow" {
 			h.handleInternalStreamGenerateContent(c, rawJSON)
 		} else if provider == "gpt" {
 			h.handleCodexInternalStreamGenerateContent(c, rawJSON)
+		} else if provider == "claude" {
+			h.handleClaudeInternalStreamGenerateContent(c, rawJSON)
 		}
 	} else {
 		reqBody := bytes.NewBuffer(rawJSON)
@@ -172,7 +177,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -204,6 +209,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				hasFirstResponse = true
 				if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() != "" {
@@ -258,7 +264,7 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			cliCancel()
 			return
 		}
@@ -276,7 +282,7 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
 			} else {
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
+				// log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
 				cliCancel(err.Error)
 			}
 			break
@@ -337,7 +343,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName.String())
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -373,6 +379,7 @@ outLoop:
 				// _, _ = logFile.Write(chunk)
 				// _, _ = logFile.Write([]byte("\n"))
 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
@@ -397,7 +404,7 @@ outLoop:
 					if errMessage.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
 						continue outLoop
 					} else {
-						log.Debugf("code: %d, error: %s", errMessage.StatusCode, errMessage.Error.Error())
+						// log.Debugf("code: %d, error: %s", errMessage.StatusCode, errMessage.Error.Error())
 						c.Status(errMessage.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, errMessage.Error.Error())
 						flusher.Flush()
@@ -414,7 +421,7 @@ outLoop:

 func (h *GeminiCLIAPIHandlers) handleCodexInternalGenerateContent(c *gin.Context, rawJSON []byte) {
 	c.Header("Content-Type", "application/json")
-	orgRawJSON := rawJSON
+	// orgRawJSON := rawJSON
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
@@ -443,7 +450,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName.String())
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			cliCancel()
 			return
 		}
@@ -469,6 +476,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
@@ -490,9 +498,231 @@ outLoop:
 					} else {
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
-						log.Debugf("org: %s", string(orgRawJSON))
-						log.Debugf("raw: %s", string(rawJSON))
-						log.Debugf("newRequestJSON: %s", newRequestJSON)
+						// log.Debugf("org: %s", string(orgRawJSON))
+						// log.Debugf("raw: %s", string(rawJSON))
+						// log.Debugf("newRequestJSON: %s", newRequestJSON)
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiCLIAPIHandlers) handleClaudeInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
+	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
+	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
+			log.Debugf("Request claude use API Key: %s", apiKey)
+		} else {
+			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
+		}
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		params := &translatorGeminiToClaude.ConvertAnthropicResponseToGeminiParams{
+			Model:      modelName.String(),
+			CreatedAt:  0,
+			ResponseID: "",
+		}
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() != "" {
+						// log.Debugf(string(jsonData))
+						outputs := translatorGeminiToClaude.ConvertAnthropicResponseToGemini(jsonData, params)
+						if len(outputs) > 0 {
+							for i := 0; i < len(outputs); i++ {
+								outputs[i], _ = sjson.SetRaw("{}", "response", outputs[i])
+								_, _ = c.Writer.Write([]byte("data: "))
+								_, _ = c.Writer.Write([]byte(outputs[i]))
+								_, _ = c.Writer.Write([]byte("\n\n"))
+							}
+						}
+					}
+					// log.Debugf(string(jsonData))
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiCLIAPIHandlers) handleClaudeInternalGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
+	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
+	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			cliCancel()
+			return
+		}
+
+		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
+			log.Debugf("Request claude use API Key: %s", apiKey)
+		} else {
+			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
+		}
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		var allChunks [][]byte
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					if len(allChunks) > 0 {
+						// Use the last chunk which should contain the complete message
+						finalResponseStr := translatorGeminiToClaude.ConvertAnthropicResponseToGeminiNonStream(allChunks, modelName.String())
+						finalResponse := []byte(finalResponseStr)
+						_, _ = c.Writer.Write(finalResponse)
+					}
+
+					cliCancel()
+					return
+				}
+
+				// Store chunk for building final response
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					allChunks = append(allChunks, jsonData)
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						cliCancel(err.Error)
 					}
 					return
--- a/internal/api/handlers/gemini/gemini_handlers.go
+++ b/internal/api/handlers/gemini/gemini_handlers.go
@@ -16,6 +16,7 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
 	"github.com/luispater/CLIProxyAPI/internal/client"
+	translatorGeminiToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/gemini"
 	translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
 	translatorGeminiToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/gemini/cli"
 	"github.com/luispater/CLIProxyAPI/internal/util"
@@ -233,7 +234,13 @@ func (h *GeminiAPIHandlers) GeminiHandler(c *gin.Context) {
 		case "streamGenerateContent":
 			h.handleCodexStreamGenerateContent(c, rawJSON)
 		}
-
+	} else if provider == "claude" {
+		switch method {
+		case "generateContent":
+			h.handleClaudeGenerateContent(c, rawJSON)
+		case "streamGenerateContent":
+			h.handleClaudeStreamGenerateContent(c, rawJSON)
+		}
 	}
 }

@@ -278,7 +285,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -340,6 +347,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() == "" {
 					if alt == "" {
@@ -377,7 +385,7 @@ outLoop:
 						log.Debugf("quota exceeded, switch client")
 						continue outLoop
 					} else {
-						log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
+						// log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
@@ -413,7 +421,7 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
 		cliClient, errorResponse = h.GetClient(modelName, false)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			cliCancel()
 			return
 		}
@@ -482,7 +490,7 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			cliCancel()
 			return
 		}
@@ -587,7 +595,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName.String())
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -621,6 +629,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
@@ -684,7 +693,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName.String())
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			cliCancel()
 			return
 		}
@@ -710,6 +719,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
@@ -741,3 +751,213 @@ outLoop:
 		}
 	}
 }
+
+func (h *GeminiAPIHandlers) handleClaudeStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
+			log.Debugf("Request claude use API Key: %s", apiKey)
+		} else {
+			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
+		}
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		params := &translatorGeminiToClaude.ConvertAnthropicResponseToGeminiParams{
+			Model:      modelName.String(),
+			CreatedAt:  0,
+			ResponseID: "",
+		}
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					data := gjson.ParseBytes(jsonData)
+					typeResult := data.Get("type")
+					if typeResult.String() != "" {
+						// log.Debugf(string(jsonData))
+						outputs := translatorGeminiToClaude.ConvertAnthropicResponseToGemini(jsonData, params)
+						if len(outputs) > 0 {
+							for i := 0; i < len(outputs); i++ {
+								_, _ = c.Writer.Write([]byte("data: "))
+								_, _ = c.Writer.Write([]byte(outputs[i]))
+								_, _ = c.Writer.Write([]byte("\n\n"))
+							}
+						}
+					}
+					// log.Debugf(string(jsonData))
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiAPIHandlers) handleClaudeGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			cliCancel()
+			return
+		}
+
+		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
+			log.Debugf("Request claude use API Key: %s", apiKey)
+		} else {
+			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
+		}
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		var allChunks [][]byte
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					if len(allChunks) > 0 {
+						// Use the last chunk which should contain the complete message
+						finalResponseStr := translatorGeminiToClaude.ConvertAnthropicResponseToGeminiNonStream(allChunks, modelName.String())
+						finalResponse := []byte(finalResponseStr)
+						_, _ = c.Writer.Write(finalResponse)
+					}
+
+					cliCancel()
+					return
+				}
+
+				// Store chunk for building final response
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					allChunks = append(allChunks, jsonData)
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
--- a/internal/api/handlers/handlers.go
+++ b/internal/api/handlers/handlers.go
@@ -112,6 +112,12 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
 				clients = append(clients, cli)
 			}
 		}
+	} else if provider == "claude" {
+		for i := 0; i < len(h.CliClients); i++ {
+			if cli, ok := h.CliClients[i].(*client.ClaudeClient); ok {
+				clients = append(clients, cli)
+			}
+		}
 	}

 	if _, hasKey := h.LastUsedClientIndex[provider]; !hasKey {
@@ -142,6 +148,8 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
 				log.Debugf("Gemini Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
 			} else if provider == "gpt" {
 				log.Debugf("Codex Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
+			} else if provider == "claude" {
+				log.Debugf("Claude Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
 			}
 			cliClient = nil
 			continue
@@ -151,6 +159,10 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
 	}

 	if len(reorderedClients) == 0 {
+		if provider == "claude" {
+			// log.Debugf("Claude Model %s is quota exceeded for all accounts", modelName)
+			return nil, &client.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account's rate limit. Please try again later."}}`)}
+		}
 		return nil, &client.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName)}
 	}

--- a/internal/api/handlers/openai/openai_handlers.go
+++ b/internal/api/handlers/openai/openai_handlers.go
@@ -15,11 +15,13 @@ import (

 	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
 	"github.com/luispater/CLIProxyAPI/internal/client"
+	translatorOpenAIToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/openai"
 	translatorOpenAIToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/openai"
 	translatorOpenAIToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/openai"
 	"github.com/luispater/CLIProxyAPI/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"

 	"github.com/gin-gonic/gin"
 )
@@ -107,6 +109,23 @@ func (h *OpenAIAPIHandlers) Models(c *gin.Context) {
 				"maxTemperature": 2,
 				"thinking":       true,
 			},
+			{
+				"id":                    "claude-opus-4-1-20250805",
+				"object":                "model",
+				"version":               "claude-opus-4-1-20250805",
+				"name":                  "Claude Opus 4.1",
+				"description":           "Anthropic's most capable model.",
+				"context_length":        200_000,
+				"max_completion_tokens": 32_000,
+				"supported_parameters": []string{
+					"tools",
+				},
+				"temperature":    1,
+				"topP":           0.95,
+				"topK":           64,
+				"maxTemperature": 2,
+				"thinking":       true,
+			},
 		},
 	})
 }
@@ -146,6 +165,12 @@ func (h *OpenAIAPIHandlers) ChatCompletions(c *gin.Context) {
 		} else {
 			h.handleCodexNonStreamingResponse(c, rawJSON)
 		}
+	} else if provider == "claude" {
+		if streamResult.Type == gjson.True {
+			h.handleClaudeStreamingResponse(c, rawJSON)
+		} else {
+			h.handleClaudeNonStreamingResponse(c, rawJSON)
+		}
 	}
 }

@@ -174,7 +199,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			cliCancel()
 			return
 		}
@@ -251,7 +276,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -288,6 +313,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				// Convert the chunk to OpenAI format and send it to the client.
 				hasFirstResponse = true
@@ -374,6 +400,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
@@ -451,7 +478,7 @@ outLoop:
 		cliClient, errorResponse = h.GetClient(modelName.String())
 		if errorResponse != nil {
 			c.Status(errorResponse.StatusCode)
-			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
 			flusher.Flush()
 			cliCancel()
 			return
@@ -481,6 +508,7 @@ outLoop:
 				}

 				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))

 				// log.Debugf("Response: %s\n", string(chunk))
 				// Convert the chunk to OpenAI format and send it to the client.
@@ -519,3 +547,217 @@ outLoop:
 		}
 	}
 }
+
+// handleClaudeNonStreamingResponse handles non-streaming chat completion responses
+// for anthropic models. It uses the streaming interface internally but aggregates
+// all responses before sending back a complete non-streaming response in OpenAI format.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleClaudeNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	// Force streaming in the request to use the streaming interface
+	newRequestJSON := translatorOpenAIToClaude.ConvertOpenAIRequestToAnthropic(rawJSON)
+	// Ensure stream is set to true for the backend request
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			cliCancel()
+			return
+		}
+
+		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
+			log.Debugf("Request claude use API Key: %s", apiKey)
+		} else {
+			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
+		}
+
+		// Use streaming interface but collect all responses
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		// Collect all streaming chunks to build the final response
+		var allChunks [][]byte
+
+		for {
+			select {
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("Client disconnected: %v", c.Request.Context().Err())
+					cliCancel()
+					return
+				}
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					// All chunks received, now build the final non-streaming response
+					if len(allChunks) > 0 {
+						// Use the last chunk which should contain the complete message
+						finalResponseStr := translatorOpenAIToClaude.ConvertAnthropicStreamingResponseToOpenAINonStream(allChunks)
+						finalResponse := []byte(finalResponseStr)
+						_, _ = c.Writer.Write(finalResponse)
+					}
+					cliCancel()
+					return
+				}
+
+				// Store chunk for building final response
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					allChunks = append(allChunks, jsonData)
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						cliCancel(err.Error)
+					}
+					return
+				}
+			case <-time.After(30 * time.Second):
+			}
+		}
+	}
+}
+
+// handleClaudeStreamingResponse handles streaming responses for anthropic models.
+// It establishes a streaming connection with the backend service and forwards
+// the response chunks to the client in real-time using Server-Sent Events.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorOpenAIToClaude.ConvertOpenAIRequestToAnthropic(rawJSON)
+	modelName := gjson.GetBytes(rawJSON, "model")
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
+			log.Debugf("Request claude use API Key: %s", apiKey)
+		} else {
+			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
+		}
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		params := &translatorOpenAIToClaude.ConvertAnthropicResponseToOpenAIParams{
+			CreatedAt:    0,
+			ResponseID:   "",
+			FinishReason: "",
+		}
+
+		hasFirstResponse := false
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					// Convert the chunk to OpenAI format and send it to the client.
+					hasFirstResponse = true
+					openAIFormats := translatorOpenAIToClaude.ConvertAnthropicResponseToOpenAI(jsonData, params)
+					for i := 0; i < len(openAIFormats); i++ {
+						_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormats[i])
+						flusher.Flush()
+					}
+				}
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+				if hasFirstResponse {
+					_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
+					flusher.Flush()
+				}
+			}
+		}
+	}
+}