Add Qwen support

2026-02-18 04:10:51 +08:00 · 2025-08-21 05:11:21 +08:00
parent d58cc55cb2
commit aa2f37d54d
20 changed files with 2888 additions and 9 deletions
--- a/internal/api/handlers/claude/code_handlers.go
+++ b/internal/api/handlers/claude/code_handlers.go
@@ -11,6 +11,7 @@ import (
 	"context"
 	"fmt"
 	"net/http"
+	"strings"
 	"time"

 	"github.com/gin-gonic/gin"
@@ -18,6 +19,7 @@ import (
 	"github.com/luispater/CLIProxyAPI/internal/client"
 	translatorClaudeCodeToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/claude/code"
 	translatorClaudeCodeToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/claude/code"
+	translatorClaudeCodeToQwen "github.com/luispater/CLIProxyAPI/internal/translator/openai/claude"
 	"github.com/luispater/CLIProxyAPI/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -62,7 +64,7 @@ func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {

 	// Check if the client requested a streaming response.
 	streamResult := gjson.GetBytes(rawJSON, "stream")
-	if streamResult.Type == gjson.False {
+	if !streamResult.Exists() || streamResult.Type == gjson.False {
 		return
 	}

@@ -72,6 +74,8 @@ func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
 		h.handleCodexStreamingResponse(c, rawJSON)
 	} else if provider == "claude" {
 		h.handleClaudeStreamingResponse(c, rawJSON)
+	} else if provider == "qwen" {
+		h.handleQwenStreamingResponse(c, rawJSON)
 	} else {
 		h.handleGeminiStreamingResponse(c, rawJSON)
 	}
@@ -518,3 +522,149 @@ outLoop:
 		}
 	}
 }
+
+// handleQwenStreamingResponse streams Claude-compatible responses backed by OpenAI.
+// It converts the Claude request into Qwen responses format, establishes SSE,
+// and translates streaming chunks back into Claude Code events.
+func (h *ClaudeCodeAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON []byte) {
+	// Set up Server-Sent Events (SSE) headers for streaming response
+	// These headers are essential for maintaining a persistent connection
+	// and enabling real-time streaming of chat completions
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	// This is crucial for streaming as it allows immediate sending of data chunks
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Parse and prepare the Claude request, extracting model name, system instructions,
+	// conversation contents, and available tools from the raw JSON
+	newRequestJSON := translatorClaudeCodeToQwen.ConvertAnthropicRequestToOpenAI(rawJSON)
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
+	// log.Debugf(string(rawJSON))
+	// log.Debugf(newRequestJSON)
+	// return
+	// Create a cancellable context for the backend client request
+	// This allows proper cleanup and cancellation of ongoing requests
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		// This prevents deadlocks and ensures proper resource cleanup
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	// Main client rotation loop with quota management
+	// This loop implements a sophisticated load balancing and failover mechanism
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request use qwen account: %s", cliClient.GetEmail())
+
+		// Initiate streaming communication with the backend client
+		// This returns two channels: one for response chunks and one for errors
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		// Track response state for proper Claude format conversion
+
+		params := &translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropicParams{
+			MessageID:            "",
+			Model:                "",
+			CreatedAt:            0,
+			ContentAccumulator:   strings.Builder{},
+			ToolCallsAccumulator: nil,
+		}
+
+		// Main streaming loop - handles multiple concurrent events using Go channels
+		// This select statement manages four different types of events simultaneously
+		for {
+			select {
+			// Case 1: Handle client disconnection
+			// Detects when the HTTP client has disconnected and cleans up resources
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request to prevent resource leaks
+					return
+				}
+
+			// Case 2: Process incoming response chunks from the backend
+			// This handles the actual streaming data from the AI model
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n"))
+
+				// Convert the backend response to Claude-compatible format
+				// This translation layer ensures API compatibility
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					outputs := translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropic(jsonData, params)
+					if len(outputs) > 0 {
+						for i := 0; i < len(outputs); i++ {
+							_, _ = c.Writer.Write([]byte("data: "))
+							_, _ = c.Writer.Write([]byte(outputs[i]))
+						}
+					}
+					flusher.Flush() // Immediately send the chunk to the client
+					// hasFirstResponse = true
+				} else {
+					// log.Debugf("chunk: %s", string(chunk))
+				}
+			// Case 3: Handle errors from the backend
+			// This manages various error conditions and implements retry logic
+			case errInfo, okError := <-errChan:
+				if okError {
+					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
+					// Special handling for quota exceeded errors
+					// If configured, attempt to switch to a different project/client
+					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						log.Debugf("quota exceeded, switch client")
+						continue outLoop // Restart the client selection process
+					} else {
+						// Forward other errors directly to the client
+						c.Status(errInfo.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
+						flusher.Flush()
+						cliCancel(errInfo.Error)
+					}
+					return
+				}
+
+			// Case 4: Send periodic keep-alive signals
+			// Prevents connection timeouts during long-running requests
+			case <-time.After(3000 * time.Millisecond):
+			}
+		}
+	}
+}
--- a/internal/api/handlers/gemini/cli/cli_handlers.go
+++ b/internal/api/handlers/gemini/cli/cli_handlers.go
@@ -18,6 +18,7 @@ import (
 	"github.com/luispater/CLIProxyAPI/internal/client"
 	translatorGeminiToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/gemini"
 	translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
+	translatorGeminiToQwen "github.com/luispater/CLIProxyAPI/internal/translator/openai/gemini"
 	"github.com/luispater/CLIProxyAPI/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -64,6 +65,8 @@ func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
 			h.handleCodexInternalGenerateContent(c, rawJSON)
 		} else if provider == "claude" {
 			h.handleClaudeInternalGenerateContent(c, rawJSON)
+		} else if provider == "qwen" {
+			h.handleQwenInternalGenerateContent(c, rawJSON)
 		}
 	} else if requestRawURI == "/v1internal:streamGenerateContent" {
 		if provider == "gemini" || provider == "unknow" {
@@ -72,6 +75,8 @@ func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
 			h.handleCodexInternalStreamGenerateContent(c, rawJSON)
 		} else if provider == "claude" {
 			h.handleClaudeInternalStreamGenerateContent(c, rawJSON)
+		} else if provider == "qwen" {
+			h.handleQwenInternalStreamGenerateContent(c, rawJSON)
 		}
 	} else {
 		reqBody := bytes.NewBuffer(rawJSON)
@@ -733,3 +738,180 @@ outLoop:
 		}
 	}
 }
+
+func (h *GeminiCLIAPIHandlers) handleQwenInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
+	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
+	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToQwen.ConvertGeminiRequestToOpenAI(rawJSON)
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
+
+	// log.Debugf("Request: %s", string(rawJSON))
+	// return
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request qwen use account: %s", cliClient.(*client.QwenClient).GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		params := &translatorGeminiToQwen.ConvertOpenAIResponseToGeminiParams{
+			ToolCallsAccumulator: nil,
+			ContentAccumulator:   strings.Builder{},
+			IsFirstChunk:         false,
+		}
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					// log.Debugf(string(jsonData))
+					outputs := translatorGeminiToQwen.ConvertOpenAIResponseToGemini(jsonData, params)
+					if len(outputs) > 0 {
+						for i := 0; i < len(outputs); i++ {
+							outputs[i], _ = sjson.SetRaw("{}", "response", outputs[i])
+							_, _ = c.Writer.Write([]byte("data: "))
+							_, _ = c.Writer.Write([]byte(outputs[i]))
+							_, _ = c.Writer.Write([]byte("\n\n"))
+						}
+					}
+					// log.Debugf(string(jsonData))
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiCLIAPIHandlers) handleQwenInternalGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
+	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
+	rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
+	rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToQwen.ConvertGeminiRequestToOpenAI(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request use qwen account: %s", cliClient.GetEmail())
+
+		resp, err := cliClient.SendRawMessage(cliCtx, []byte(newRequestJSON), "")
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				cliCancel(err.Error)
+			}
+			break
+		} else {
+			h.AddAPIResponseData(c, resp)
+			h.AddAPIResponseData(c, []byte("\n"))
+
+			newResp := translatorGeminiToQwen.ConvertOpenAINonStreamResponseToGemini(resp)
+			_, _ = c.Writer.Write([]byte(newResp))
+			cliCancel(resp)
+			break
+		}
+	}
+}
--- a/internal/api/handlers/gemini/gemini_handlers.go
+++ b/internal/api/handlers/gemini/gemini_handlers.go
@@ -19,6 +19,7 @@ import (
 	translatorGeminiToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/gemini"
 	translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
 	translatorGeminiToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/gemini/cli"
+	translatorGeminiToQwen "github.com/luispater/CLIProxyAPI/internal/translator/openai/gemini"
 	"github.com/luispater/CLIProxyAPI/internal/util"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -241,6 +242,13 @@ func (h *GeminiAPIHandlers) GeminiHandler(c *gin.Context) {
 		case "streamGenerateContent":
 			h.handleClaudeStreamGenerateContent(c, rawJSON)
 		}
+	} else if provider == "qwen" {
+		switch method {
+		case "generateContent":
+			h.handleQwenGenerateContent(c, rawJSON)
+		case "streamGenerateContent":
+			h.handleQwenStreamGenerateContent(c, rawJSON)
+		}
 	}
 }

@@ -961,3 +969,163 @@ outLoop:
 		}
 	}
 }
+
+func (h *GeminiAPIHandlers) handleQwenStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToQwen.ConvertGeminiRequestToOpenAI(rawJSON)
+	newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request use qwen account: %s", cliClient.GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		params := &translatorGeminiToQwen.ConvertOpenAIResponseToGeminiParams{
+			ToolCallsAccumulator: nil,
+			ContentAccumulator:   strings.Builder{},
+			IsFirstChunk:         false,
+		}
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					cliCancel()
+					return
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n\n"))
+				if bytes.HasPrefix(chunk, []byte("data: ")) {
+					jsonData := chunk[6:]
+					outputs := translatorGeminiToQwen.ConvertOpenAIResponseToGemini(jsonData, params)
+					if len(outputs) > 0 {
+						for i := 0; i < len(outputs); i++ {
+							_, _ = c.Writer.Write([]byte("data: "))
+							_, _ = c.Writer.Write([]byte(outputs[i]))
+							_, _ = c.Writer.Write([]byte("\n\n"))
+						}
+					}
+					// log.Debugf(string(jsonData))
+				}
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}
+
+func (h *GeminiAPIHandlers) handleQwenGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	// Prepare the request for the backend client.
+	newRequestJSON := translatorGeminiToQwen.ConvertGeminiRequestToOpenAI(rawJSON)
+	// log.Debugf("Request: %s", newRequestJSON)
+
+	modelName := gjson.GetBytes(rawJSON, "model")
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName.String())
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request use qwen account: %s", cliClient.GetEmail())
+
+		resp, err := cliClient.SendRawMessage(cliCtx, []byte(newRequestJSON), "")
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				cliCancel(err.Error)
+			}
+			break
+		} else {
+			h.AddAPIResponseData(c, resp)
+			h.AddAPIResponseData(c, []byte("\n"))
+
+			newResp := translatorGeminiToQwen.ConvertOpenAINonStreamResponseToGemini(resp)
+			_, _ = c.Writer.Write([]byte(newResp))
+			cliCancel(resp)
+			break
+		}
+	}
+}
--- a/internal/api/handlers/handlers.go
+++ b/internal/api/handlers/handlers.go
@@ -118,6 +118,12 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
 				clients = append(clients, cli)
 			}
 		}
+	} else if provider == "qwen" {
+		for i := 0; i < len(h.CliClients); i++ {
+			if cli, ok := h.CliClients[i].(*client.QwenClient); ok {
+				clients = append(clients, cli)
+			}
+		}
 	}

 	if _, hasKey := h.LastUsedClientIndex[provider]; !hasKey {
@@ -150,6 +156,8 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
 				log.Debugf("Codex Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
 			} else if provider == "claude" {
 				log.Debugf("Claude Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
+			} else if provider == "qwen" {
+				log.Debugf("Qwen Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
 			}
 			cliClient = nil
 			continue
--- a/internal/api/handlers/openai/openai_handlers.go
+++ b/internal/api/handlers/openai/openai_handlers.go
@@ -171,6 +171,13 @@ func (h *OpenAIAPIHandlers) ChatCompletions(c *gin.Context) {
 		} else {
 			h.handleClaudeNonStreamingResponse(c, rawJSON)
 		}
+	} else if provider == "qwen" {
+		// qwen3-coder-plus / qwen3-coder-flash
+		if streamResult.Type == gjson.True {
+			h.handleQwenStreamingResponse(c, rawJSON)
+		} else {
+			h.handleQwenNonStreamingResponse(c, rawJSON)
+		}
 	}
 }

@@ -761,3 +768,155 @@ outLoop:
 		}
 	}
 }
+
+// handleQwenNonStreamingResponse handles non-streaming chat completion responses
+// for Qwen models. It selects a client from the pool, sends the request, and
+// aggregates the response before sending it back to the client in OpenAI format.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleQwenNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request qwen use account: %s", cliClient.(*client.QwenClient).GetEmail())
+
+		resp, err := cliClient.SendRawMessage(cliCtx, rawJSON, modelName)
+		if err != nil {
+			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+				continue
+			} else {
+				c.Status(err.StatusCode)
+				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				cliCancel(err.Error)
+			}
+			break
+		} else {
+			_, _ = c.Writer.Write(resp)
+			cliCancel(resp)
+			break
+		}
+	}
+}
+
+// handleQwenStreamingResponse handles streaming responses for Qwen models.
+// It establishes a streaming connection with the backend service and forwards
+// the response chunks to the client in real-time using Server-Sent Events.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Prepare the request for the backend client.
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+
+	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
+
+	var cliClient client.Client
+	defer func() {
+		// Ensure the client's mutex is unlocked on function exit.
+		if cliClient != nil {
+			cliClient.GetRequestMutex().Unlock()
+		}
+	}()
+
+outLoop:
+	for {
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.GetClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
+			flusher.Flush()
+			cliCancel()
+			return
+		}
+
+		log.Debugf("Request qwen use account: %s", cliClient.(*client.QwenClient).GetEmail())
+
+		// Send the message and receive response chunks and errors via channels.
+		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, modelName)
+
+		for {
+			select {
+			// Handle client disconnection.
+			case <-c.Request.Context().Done():
+				if c.Request.Context().Err().Error() == "context canceled" {
+					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					cliCancel() // Cancel the backend request.
+					return
+				}
+			// Process incoming response chunks.
+			case chunk, okStream := <-respChan:
+				if !okStream {
+					flusher.Flush()
+					cliCancel()
+					return
+				}
+
+				h.AddAPIResponseData(c, chunk)
+				h.AddAPIResponseData(c, []byte("\n"))
+
+				// Convert the chunk to OpenAI format and send it to the client.
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n"))
+
+				flusher.Flush()
+			// Handle errors from the backend.
+			case err, okError := <-errChan:
+				if okError {
+					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+						continue outLoop
+					} else {
+						c.Status(err.StatusCode)
+						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						flusher.Flush()
+						cliCancel(err.Error)
+					}
+					return
+				}
+			// Send a keep-alive signal to the client.
+			case <-time.After(500 * time.Millisecond):
+			}
+		}
+	}
+}