mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-18 12:20:52 +08:00
671 lines
25 KiB
Go
671 lines
25 KiB
Go
// Package claude provides HTTP handlers for Claude API code-related functionality.
|
|
// This package implements Claude-compatible streaming chat completions with sophisticated
|
|
// client rotation and quota management systems to ensure high availability and optimal
|
|
// resource utilization across multiple backend clients. It handles request translation
|
|
// between Claude API format and the underlying Gemini backend, providing seamless
|
|
// API compatibility while maintaining robust error handling and connection management.
|
|
package claude
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/luispater/CLIProxyAPI/internal/api/handlers"
|
|
"github.com/luispater/CLIProxyAPI/internal/client"
|
|
translatorClaudeCodeToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/claude/code"
|
|
translatorClaudeCodeToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/claude/code"
|
|
translatorClaudeCodeToQwen "github.com/luispater/CLIProxyAPI/internal/translator/openai/claude"
|
|
"github.com/luispater/CLIProxyAPI/internal/util"
|
|
log "github.com/sirupsen/logrus"
|
|
"github.com/tidwall/gjson"
|
|
"github.com/tidwall/sjson"
|
|
)
|
|
|
|
// ClaudeCodeAPIHandlers contains the handlers for Claude API endpoints.
|
|
// It holds a pool of clients to interact with the backend service.
|
|
type ClaudeCodeAPIHandlers struct {
|
|
*handlers.APIHandlers
|
|
}
|
|
|
|
// NewClaudeCodeAPIHandlers creates a new Claude API handlers instance.
|
|
// It takes an APIHandlers instance as input and returns a ClaudeCodeAPIHandlers.
|
|
func NewClaudeCodeAPIHandlers(apiHandlers *handlers.APIHandlers) *ClaudeCodeAPIHandlers {
|
|
return &ClaudeCodeAPIHandlers{
|
|
APIHandlers: apiHandlers,
|
|
}
|
|
}
|
|
|
|
// ClaudeMessages handles Claude-compatible streaming chat completions.
|
|
// This function implements a sophisticated client rotation and quota management system
|
|
// to ensure high availability and optimal resource utilization across multiple backend clients.
|
|
func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
|
|
// Extract raw JSON data from the incoming request
|
|
rawJSON, err := c.GetRawData()
|
|
// If data retrieval fails, return a 400 Bad Request error.
|
|
if err != nil {
|
|
c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: fmt.Sprintf("Invalid request: %v", err),
|
|
Type: "invalid_request_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
// h.handleGeminiStreamingResponse(c, rawJSON)
|
|
// h.handleCodexStreamingResponse(c, rawJSON)
|
|
modelName := gjson.GetBytes(rawJSON, "model")
|
|
provider := util.GetProviderName(modelName.String())
|
|
|
|
// Check if the client requested a streaming response.
|
|
streamResult := gjson.GetBytes(rawJSON, "stream")
|
|
if !streamResult.Exists() || streamResult.Type == gjson.False {
|
|
return
|
|
}
|
|
|
|
if provider == "gemini" {
|
|
h.handleGeminiStreamingResponse(c, rawJSON)
|
|
} else if provider == "gpt" {
|
|
h.handleCodexStreamingResponse(c, rawJSON)
|
|
} else if provider == "claude" {
|
|
h.handleClaudeStreamingResponse(c, rawJSON)
|
|
} else if provider == "qwen" {
|
|
h.handleQwenStreamingResponse(c, rawJSON)
|
|
} else {
|
|
h.handleGeminiStreamingResponse(c, rawJSON)
|
|
}
|
|
}
|
|
|
|
// handleGeminiStreamingResponse streams Claude-compatible responses backed by Gemini.
|
|
// It sets up SSE, selects a backend client with rotation/quota logic,
|
|
// forwards chunks, and translates them to Claude CLI format.
|
|
func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSON []byte) {
|
|
// Set up Server-Sent Events (SSE) headers for streaming response
|
|
// These headers are essential for maintaining a persistent connection
|
|
// and enabling real-time streaming of chat completions
|
|
c.Header("Content-Type", "text/event-stream")
|
|
c.Header("Cache-Control", "no-cache")
|
|
c.Header("Connection", "keep-alive")
|
|
c.Header("Access-Control-Allow-Origin", "*")
|
|
|
|
// Get the http.Flusher interface to manually flush the response.
|
|
// This is crucial for streaming as it allows immediate sending of data chunks
|
|
flusher, ok := c.Writer.(http.Flusher)
|
|
if !ok {
|
|
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: "Streaming not supported",
|
|
Type: "server_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
// Parse and prepare the Claude request, extracting model name, system instructions,
|
|
// conversation contents, and available tools from the raw JSON
|
|
modelName, systemInstruction, contents, tools := translatorClaudeCodeToGeminiCli.ConvertClaudeCodeRequestToCli(rawJSON)
|
|
|
|
// Create a cancellable context for the backend client request
|
|
// This allows proper cleanup and cancellation of ongoing requests
|
|
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
|
|
|
var cliClient client.Client
|
|
cliClient = client.NewGeminiClient(nil, nil, nil)
|
|
defer func() {
|
|
// Ensure the client's mutex is unlocked on function exit.
|
|
// This prevents deadlocks and ensures proper resource cleanup
|
|
if cliClient != nil {
|
|
cliClient.GetRequestMutex().Unlock()
|
|
}
|
|
}()
|
|
|
|
// Main client rotation loop with quota management
|
|
// This loop implements a sophisticated load balancing and failover mechanism
|
|
outLoop:
|
|
for {
|
|
var errorResponse *client.ErrorMessage
|
|
cliClient, errorResponse = h.GetClient(modelName)
|
|
if errorResponse != nil {
|
|
c.Status(errorResponse.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
// Determine the authentication method being used by the selected client
|
|
// This affects how responses are formatted and logged
|
|
isGlAPIKey := false
|
|
if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
|
|
log.Debugf("Request use gemini generative language API Key: %s", glAPIKey)
|
|
isGlAPIKey = true
|
|
} else {
|
|
log.Debugf("Request use gemini account: %s, project id: %s", cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
|
|
}
|
|
// Initiate streaming communication with the backend client
|
|
// This returns two channels: one for response chunks and one for errors
|
|
|
|
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, true)
|
|
|
|
// Track response state for proper Claude format conversion
|
|
hasFirstResponse := false
|
|
responseType := 0
|
|
responseIndex := 0
|
|
|
|
// Main streaming loop - handles multiple concurrent events using Go channels
|
|
// This select statement manages four different types of events simultaneously
|
|
for {
|
|
select {
|
|
// Case 1: Handle client disconnection
|
|
// Detects when the HTTP client has disconnected and cleans up resources
|
|
case <-c.Request.Context().Done():
|
|
if c.Request.Context().Err().Error() == "context canceled" {
|
|
log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
|
|
cliCancel() // Cancel the backend request to prevent resource leaks
|
|
return
|
|
}
|
|
|
|
// Case 2: Process incoming response chunks from the backend
|
|
// This handles the actual streaming data from the AI model
|
|
case chunk, okStream := <-respChan:
|
|
if !okStream {
|
|
// Stream has ended - send the final message_stop event
|
|
// This follows the Claude API specification for stream termination
|
|
_, _ = c.Writer.Write([]byte(`event: message_stop`))
|
|
_, _ = c.Writer.Write([]byte("\n"))
|
|
_, _ = c.Writer.Write([]byte(`data: {"type":"message_stop"}`))
|
|
_, _ = c.Writer.Write([]byte("\n\n\n"))
|
|
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
h.AddAPIResponseData(c, chunk)
|
|
h.AddAPIResponseData(c, []byte("\n\n"))
|
|
// Convert the backend response to Claude-compatible format
|
|
// This translation layer ensures API compatibility
|
|
claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
|
|
if claudeFormat != "" {
|
|
_, _ = c.Writer.Write([]byte(claudeFormat))
|
|
flusher.Flush() // Immediately send the chunk to the client
|
|
}
|
|
hasFirstResponse = true
|
|
|
|
// Case 3: Handle errors from the backend
|
|
// This manages various error conditions and implements retry logic
|
|
case errInfo, okError := <-errChan:
|
|
if okError {
|
|
// Special handling for quota exceeded errors
|
|
// If configured, attempt to switch to a different project/client
|
|
if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
|
continue outLoop // Restart the client selection process
|
|
} else {
|
|
// Forward other errors directly to the client
|
|
c.Status(errInfo.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel(errInfo.Error)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Case 4: Send periodic keep-alive signals
|
|
// Prevents connection timeouts during long-running requests
|
|
case <-time.After(500 * time.Millisecond):
|
|
if hasFirstResponse {
|
|
// Send a ping event to maintain the connection
|
|
// This is especially important for slow AI model responses
|
|
// output := "event: ping\n"
|
|
// output = output + `data: {"type": "ping"}`
|
|
// output = output + "\n\n\n"
|
|
// _, _ = c.Writer.Write([]byte(output))
|
|
//
|
|
// flusher.Flush()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleCodexStreamingResponse streams Claude-compatible responses backed by OpenAI.
|
|
// It converts the Claude request into Codex/OpenAI responses format, establishes SSE,
|
|
// and translates streaming chunks back into Claude CLI events.
|
|
func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON []byte) {
|
|
// Set up Server-Sent Events (SSE) headers for streaming response
|
|
// These headers are essential for maintaining a persistent connection
|
|
// and enabling real-time streaming of chat completions
|
|
c.Header("Content-Type", "text/event-stream")
|
|
c.Header("Cache-Control", "no-cache")
|
|
c.Header("Connection", "keep-alive")
|
|
c.Header("Access-Control-Allow-Origin", "*")
|
|
|
|
// Get the http.Flusher interface to manually flush the response.
|
|
// This is crucial for streaming as it allows immediate sending of data chunks
|
|
flusher, ok := c.Writer.(http.Flusher)
|
|
if !ok {
|
|
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: "Streaming not supported",
|
|
Type: "server_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
// Parse and prepare the Claude request, extracting model name, system instructions,
|
|
// conversation contents, and available tools from the raw JSON
|
|
newRequestJSON := translatorClaudeCodeToCodex.ConvertClaudeCodeRequestToCodex(rawJSON)
|
|
modelName := gjson.GetBytes(rawJSON, "model").String()
|
|
|
|
newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
|
|
// log.Debugf(string(rawJSON))
|
|
// log.Debugf(newRequestJSON)
|
|
// return
|
|
// Create a cancellable context for the backend client request
|
|
// This allows proper cleanup and cancellation of ongoing requests
|
|
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
|
|
|
var cliClient client.Client
|
|
defer func() {
|
|
// Ensure the client's mutex is unlocked on function exit.
|
|
// This prevents deadlocks and ensures proper resource cleanup
|
|
if cliClient != nil {
|
|
cliClient.GetRequestMutex().Unlock()
|
|
}
|
|
}()
|
|
|
|
// Main client rotation loop with quota management
|
|
// This loop implements a sophisticated load balancing and failover mechanism
|
|
outLoop:
|
|
for {
|
|
var errorResponse *client.ErrorMessage
|
|
cliClient, errorResponse = h.GetClient(modelName)
|
|
if errorResponse != nil {
|
|
c.Status(errorResponse.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
log.Debugf("Request use codex account: %s", cliClient.GetEmail())
|
|
|
|
// Initiate streaming communication with the backend client
|
|
// This returns two channels: one for response chunks and one for errors
|
|
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
|
|
|
// Track response state for proper Claude format conversion
|
|
// hasFirstResponse := false
|
|
hasToolCall := false
|
|
|
|
// Main streaming loop - handles multiple concurrent events using Go channels
|
|
// This select statement manages four different types of events simultaneously
|
|
for {
|
|
select {
|
|
// Case 1: Handle client disconnection
|
|
// Detects when the HTTP client has disconnected and cleans up resources
|
|
case <-c.Request.Context().Done():
|
|
if c.Request.Context().Err().Error() == "context canceled" {
|
|
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
|
|
cliCancel() // Cancel the backend request to prevent resource leaks
|
|
return
|
|
}
|
|
|
|
// Case 2: Process incoming response chunks from the backend
|
|
// This handles the actual streaming data from the AI model
|
|
case chunk, okStream := <-respChan:
|
|
if !okStream {
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
h.AddAPIResponseData(c, chunk)
|
|
h.AddAPIResponseData(c, []byte("\n\n"))
|
|
|
|
// Convert the backend response to Claude-compatible format
|
|
// This translation layer ensures API compatibility
|
|
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
|
jsonData := chunk[6:]
|
|
var claudeFormat string
|
|
claudeFormat, hasToolCall = translatorClaudeCodeToCodex.ConvertCodexResponseToClaude(jsonData, hasToolCall)
|
|
// log.Debugf("claudeFormat: %s", claudeFormat)
|
|
if claudeFormat != "" {
|
|
_, _ = c.Writer.Write([]byte(claudeFormat))
|
|
_, _ = c.Writer.Write([]byte("\n"))
|
|
}
|
|
flusher.Flush() // Immediately send the chunk to the client
|
|
// hasFirstResponse = true
|
|
} else {
|
|
// log.Debugf("chunk: %s", string(chunk))
|
|
}
|
|
// Case 3: Handle errors from the backend
|
|
// This manages various error conditions and implements retry logic
|
|
case errInfo, okError := <-errChan:
|
|
if okError {
|
|
// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
|
|
// Special handling for quota exceeded errors
|
|
// If configured, attempt to switch to a different project/client
|
|
if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
|
log.Debugf("quota exceeded, switch client")
|
|
continue outLoop // Restart the client selection process
|
|
} else {
|
|
// Forward other errors directly to the client
|
|
c.Status(errInfo.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel(errInfo.Error)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Case 4: Send periodic keep-alive signals
|
|
// Prevents connection timeouts during long-running requests
|
|
case <-time.After(3000 * time.Millisecond):
|
|
// if hasFirstResponse {
|
|
// // Send a ping event to maintain the connection
|
|
// // This is especially important for slow AI model responses
|
|
// output := "event: ping\n"
|
|
// output = output + `data: {"type": "ping"}`
|
|
// output = output + "\n\n"
|
|
// _, _ = c.Writer.Write([]byte(output))
|
|
//
|
|
// flusher.Flush()
|
|
// }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleClaudeStreamingResponse streams Claude-compatible responses backed by OpenAI.
|
|
// It converts the Claude request into OpenAI responses format, establishes SSE,
|
|
// and translates streaming chunks back into Claude Code events.
|
|
func (h *ClaudeCodeAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {
|
|
|
|
// Get the http.Flusher interface to manually flush the response.
|
|
// This is crucial for streaming as it allows immediate sending of data chunks
|
|
flusher, ok := c.Writer.(http.Flusher)
|
|
if !ok {
|
|
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: "Streaming not supported",
|
|
Type: "server_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
modelName := gjson.GetBytes(rawJSON, "model").String()
|
|
|
|
// Create a cancellable context for the backend client request
|
|
// This allows proper cleanup and cancellation of ongoing requests
|
|
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
|
|
|
var cliClient client.Client
|
|
defer func() {
|
|
// Ensure the client's mutex is unlocked on function exit.
|
|
// This prevents deadlocks and ensures proper resource cleanup
|
|
if cliClient != nil {
|
|
cliClient.GetRequestMutex().Unlock()
|
|
}
|
|
}()
|
|
|
|
// Main client rotation loop with quota management
|
|
// This loop implements a sophisticated load balancing and failover mechanism
|
|
outLoop:
|
|
for {
|
|
var errorResponse *client.ErrorMessage
|
|
cliClient, errorResponse = h.GetClient(modelName)
|
|
if errorResponse != nil {
|
|
|
|
if errorResponse.StatusCode == 429 {
|
|
c.Header("Content-Type", "application/json")
|
|
c.Header("Content-Length", fmt.Sprintf("%d", len(errorResponse.Error.Error())))
|
|
}
|
|
c.Status(errorResponse.StatusCode)
|
|
|
|
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel()
|
|
|
|
return
|
|
}
|
|
|
|
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
|
log.Debugf("Request claude use API Key: %s", apiKey)
|
|
} else {
|
|
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
|
}
|
|
|
|
// Initiate streaming communication with the backend client
|
|
// This returns two channels: one for response chunks and one for errors
|
|
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
|
|
|
|
hasFirstResponse := false
|
|
// Main streaming loop - handles multiple concurrent events using Go channels
|
|
// This select statement manages four different types of events simultaneously
|
|
for {
|
|
select {
|
|
// Case 1: Handle client disconnection
|
|
// Detects when the HTTP client has disconnected and cleans up resources
|
|
case <-c.Request.Context().Done():
|
|
if c.Request.Context().Err().Error() == "context canceled" {
|
|
log.Debugf("ClaudeClient disconnected: %v", c.Request.Context().Err())
|
|
cliCancel() // Cancel the backend request to prevent resource leaks
|
|
return
|
|
}
|
|
|
|
// Case 2: Process incoming response chunks from the backend
|
|
// This handles the actual streaming data from the AI model
|
|
case chunk, okStream := <-respChan:
|
|
if !okStream {
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
h.AddAPIResponseData(c, chunk)
|
|
h.AddAPIResponseData(c, []byte("\n\n"))
|
|
|
|
if !hasFirstResponse {
|
|
// Set up Server-Sent Events (SSE) headers for streaming response
|
|
// These headers are essential for maintaining a persistent connection
|
|
// and enabling real-time streaming of chat completions
|
|
c.Header("Content-Type", "text/event-stream")
|
|
c.Header("Cache-Control", "no-cache")
|
|
c.Header("Connection", "keep-alive")
|
|
c.Header("Access-Control-Allow-Origin", "*")
|
|
hasFirstResponse = true
|
|
}
|
|
|
|
_, _ = c.Writer.Write(chunk)
|
|
_, _ = c.Writer.Write([]byte("\n"))
|
|
flusher.Flush()
|
|
|
|
// Case 3: Handle errors from the backend
|
|
// This manages various error conditions and implements retry logic
|
|
case errInfo, okError := <-errChan:
|
|
if okError {
|
|
// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
|
|
// Special handling for quota exceeded errors
|
|
// If configured, attempt to switch to a different project/client
|
|
// if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
|
if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
|
log.Debugf("quota exceeded, switch client")
|
|
continue outLoop // Restart the client selection process
|
|
} else {
|
|
// Forward other errors directly to the client
|
|
if errInfo.Addon != nil {
|
|
for key, val := range errInfo.Addon {
|
|
c.Header(key, val[0])
|
|
}
|
|
}
|
|
|
|
c.Status(errInfo.StatusCode)
|
|
|
|
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel(errInfo.Error)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Case 4: Send periodic keep-alive signals
|
|
// Prevents connection timeouts during long-running requests
|
|
case <-time.After(3000 * time.Millisecond):
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleQwenStreamingResponse streams Claude-compatible responses backed by OpenAI.
|
|
// It converts the Claude request into Qwen responses format, establishes SSE,
|
|
// and translates streaming chunks back into Claude Code events.
|
|
func (h *ClaudeCodeAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON []byte) {
|
|
// Set up Server-Sent Events (SSE) headers for streaming response
|
|
// These headers are essential for maintaining a persistent connection
|
|
// and enabling real-time streaming of chat completions
|
|
c.Header("Content-Type", "text/event-stream")
|
|
c.Header("Cache-Control", "no-cache")
|
|
c.Header("Connection", "keep-alive")
|
|
c.Header("Access-Control-Allow-Origin", "*")
|
|
|
|
// Get the http.Flusher interface to manually flush the response.
|
|
// This is crucial for streaming as it allows immediate sending of data chunks
|
|
flusher, ok := c.Writer.(http.Flusher)
|
|
if !ok {
|
|
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: "Streaming not supported",
|
|
Type: "server_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
// Parse and prepare the Claude request, extracting model name, system instructions,
|
|
// conversation contents, and available tools from the raw JSON
|
|
newRequestJSON := translatorClaudeCodeToQwen.ConvertAnthropicRequestToOpenAI(rawJSON)
|
|
modelName := gjson.GetBytes(rawJSON, "model").String()
|
|
|
|
newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
|
|
// log.Debugf(string(rawJSON))
|
|
// log.Debugf(newRequestJSON)
|
|
// return
|
|
// Create a cancellable context for the backend client request
|
|
// This allows proper cleanup and cancellation of ongoing requests
|
|
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
|
|
|
var cliClient client.Client
|
|
defer func() {
|
|
// Ensure the client's mutex is unlocked on function exit.
|
|
// This prevents deadlocks and ensures proper resource cleanup
|
|
if cliClient != nil {
|
|
cliClient.GetRequestMutex().Unlock()
|
|
}
|
|
}()
|
|
|
|
// Main client rotation loop with quota management
|
|
// This loop implements a sophisticated load balancing and failover mechanism
|
|
outLoop:
|
|
for {
|
|
var errorResponse *client.ErrorMessage
|
|
cliClient, errorResponse = h.GetClient(modelName)
|
|
if errorResponse != nil {
|
|
c.Status(errorResponse.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
log.Debugf("Request use qwen account: %s", cliClient.GetEmail())
|
|
|
|
// Initiate streaming communication with the backend client
|
|
// This returns two channels: one for response chunks and one for errors
|
|
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
|
|
|
// Track response state for proper Claude format conversion
|
|
|
|
params := &translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropicParams{
|
|
MessageID: "",
|
|
Model: "",
|
|
CreatedAt: 0,
|
|
ContentAccumulator: strings.Builder{},
|
|
ToolCallsAccumulator: nil,
|
|
}
|
|
|
|
// Main streaming loop - handles multiple concurrent events using Go channels
|
|
// This select statement manages four different types of events simultaneously
|
|
for {
|
|
select {
|
|
// Case 1: Handle client disconnection
|
|
// Detects when the HTTP client has disconnected and cleans up resources
|
|
case <-c.Request.Context().Done():
|
|
if c.Request.Context().Err().Error() == "context canceled" {
|
|
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
|
|
cliCancel() // Cancel the backend request to prevent resource leaks
|
|
return
|
|
}
|
|
|
|
// Case 2: Process incoming response chunks from the backend
|
|
// This handles the actual streaming data from the AI model
|
|
case chunk, okStream := <-respChan:
|
|
if !okStream {
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
h.AddAPIResponseData(c, chunk)
|
|
h.AddAPIResponseData(c, []byte("\n"))
|
|
|
|
// Convert the backend response to Claude-compatible format
|
|
// This translation layer ensures API compatibility
|
|
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
|
jsonData := chunk[6:]
|
|
outputs := translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropic(jsonData, params)
|
|
if len(outputs) > 0 {
|
|
for i := 0; i < len(outputs); i++ {
|
|
_, _ = c.Writer.Write([]byte("data: "))
|
|
_, _ = c.Writer.Write([]byte(outputs[i]))
|
|
}
|
|
}
|
|
flusher.Flush() // Immediately send the chunk to the client
|
|
// hasFirstResponse = true
|
|
} else {
|
|
// log.Debugf("chunk: %s", string(chunk))
|
|
}
|
|
// Case 3: Handle errors from the backend
|
|
// This manages various error conditions and implements retry logic
|
|
case errInfo, okError := <-errChan:
|
|
if okError {
|
|
// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
|
|
// Special handling for quota exceeded errors
|
|
// If configured, attempt to switch to a different project/client
|
|
if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
|
log.Debugf("quota exceeded, switch client")
|
|
continue outLoop // Restart the client selection process
|
|
} else {
|
|
// Forward other errors directly to the client
|
|
c.Status(errInfo.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel(errInfo.Error)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Case 4: Send periodic keep-alive signals
|
|
// Prevents connection timeouts during long-running requests
|
|
case <-time.After(3000 * time.Millisecond):
|
|
}
|
|
}
|
|
}
|
|
}
|