mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-19 04:40:52 +08:00
Add ClaudeMessages handler for SSE-compatible chat completions
- Introduced `ClaudeMessages` to handle Claude-compatible streaming chat completions. - Implemented client rotation, quota management, and dynamic model name mapping for better load balancing and resource utilization. - Enhanced response streaming with real-time chunking and Claude format conversion. - Added error handling for quota exhaustion, client disconnections, and backend failures.
This commit is contained in:
234
internal/api/claude-code-handlers.go
Normal file
234
internal/api/claude-code-handlers.go
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/luispater/CLIProxyAPI/internal/api/translator"
|
||||||
|
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ClaudeMessages handles Claude-compatible streaming chat completions.
|
||||||
|
// This function implements a sophisticated client rotation and quota management system
|
||||||
|
// to ensure high availability and optimal resource utilization across multiple backend clients.
|
||||||
|
func (h *APIHandlers) ClaudeMessages(c *gin.Context) {
|
||||||
|
// Extract raw JSON data from the incoming request
|
||||||
|
rawJson, err := c.GetRawData()
|
||||||
|
// If data retrieval fails, return a 400 Bad Request error.
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, ErrorResponse{
|
||||||
|
Error: ErrorDetail{
|
||||||
|
Message: fmt.Sprintf("Invalid request: %v", err),
|
||||||
|
Type: "invalid_request_error",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up Server-Sent Events (SSE) headers for streaming response
|
||||||
|
// These headers are essential for maintaining a persistent connection
|
||||||
|
// and enabling real-time streaming of chat completions
|
||||||
|
c.Header("Content-Type", "text/event-stream")
|
||||||
|
c.Header("Cache-Control", "no-cache")
|
||||||
|
c.Header("Connection", "keep-alive")
|
||||||
|
c.Header("Access-Control-Allow-Origin", "*")
|
||||||
|
|
||||||
|
// Get the http.Flusher interface to manually flush the response.
|
||||||
|
// This is crucial for streaming as it allows immediate sending of data chunks
|
||||||
|
flusher, ok := c.Writer.(http.Flusher)
|
||||||
|
if !ok {
|
||||||
|
c.JSON(http.StatusInternalServerError, ErrorResponse{
|
||||||
|
Error: ErrorDetail{
|
||||||
|
Message: "Streaming not supported",
|
||||||
|
Type: "server_error",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse and prepare the Claude request, extracting model name, system instructions,
|
||||||
|
// conversation contents, and available tools from the raw JSON
|
||||||
|
modelName, systemInstruction, contents, tools := translator.PrepareClaudeRequest(rawJson)
|
||||||
|
|
||||||
|
// Map Claude model names to corresponding Gemini models
|
||||||
|
// This allows the proxy to handle Claude API calls using Gemini backend
|
||||||
|
if modelName == "claude-sonnet-4-20250514" {
|
||||||
|
modelName = "gemini-2.5-pro"
|
||||||
|
} else if modelName == "claude-3-5-haiku-20241022" {
|
||||||
|
modelName = "gemini-2.5-flash"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a cancellable context for the backend client request
|
||||||
|
// This allows proper cleanup and cancellation of ongoing requests
|
||||||
|
cliCtx, cliCancel := context.WithCancel(context.Background())
|
||||||
|
var cliClient *client.Client
|
||||||
|
defer func() {
|
||||||
|
// Ensure the client's mutex is unlocked on function exit.
|
||||||
|
// This prevents deadlocks and ensures proper resource cleanup
|
||||||
|
if cliClient != nil {
|
||||||
|
cliClient.RequestMutex.Unlock()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Main client rotation loop with quota management
|
||||||
|
// This loop implements a sophisticated load balancing and failover mechanism
|
||||||
|
outLoop:
|
||||||
|
for {
|
||||||
|
// Thread-safe client index rotation to distribute load evenly
|
||||||
|
// This ensures fair usage across all available clients
|
||||||
|
mutex.Lock()
|
||||||
|
startIndex := lastUsedClientIndex
|
||||||
|
currentIndex := (startIndex + 1) % len(h.cliClients)
|
||||||
|
lastUsedClientIndex = currentIndex
|
||||||
|
mutex.Unlock()
|
||||||
|
|
||||||
|
// Build a list of available clients, starting from the next client in rotation
|
||||||
|
// This implements round-robin load balancing while filtering out quota-exceeded clients
|
||||||
|
reorderedClients := make([]*client.Client, 0)
|
||||||
|
for i := 0; i < len(h.cliClients); i++ {
|
||||||
|
cliClient = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
|
||||||
|
|
||||||
|
// Skip clients that have exceeded their quota for the requested model
|
||||||
|
if cliClient.IsModelQuotaExceeded(modelName) {
|
||||||
|
// Log different messages based on authentication method (API key vs account)
|
||||||
|
if cliClient.GetGenerativeLanguageAPIKey() == "" {
|
||||||
|
log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
|
||||||
|
} else {
|
||||||
|
log.Debugf("Model %s is quota exceeded for generative language API Key: %s", modelName, cliClient.GetGenerativeLanguageAPIKey())
|
||||||
|
}
|
||||||
|
|
||||||
|
cliClient = nil
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
reorderedClients = append(reorderedClients, cliClient)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If all clients have exceeded quota, return a 429 Too Many Requests error
|
||||||
|
if len(reorderedClients) == 0 {
|
||||||
|
c.Status(429)
|
||||||
|
_, _ = fmt.Fprint(c.Writer, fmt.Sprintf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName))
|
||||||
|
flusher.Flush()
|
||||||
|
cliCancel()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to acquire a lock on an available client using non-blocking TryLock
|
||||||
|
// This prevents blocking when a client is busy with another request
|
||||||
|
locked := false
|
||||||
|
for i := 0; i < len(reorderedClients); i++ {
|
||||||
|
cliClient = reorderedClients[i]
|
||||||
|
if cliClient.RequestMutex.TryLock() {
|
||||||
|
locked = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no client is immediately available, fall back to blocking on the first client
|
||||||
|
// This ensures the request will eventually be processed
|
||||||
|
if !locked {
|
||||||
|
cliClient = h.cliClients[0]
|
||||||
|
cliClient.RequestMutex.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine the authentication method being used by the selected client
|
||||||
|
// This affects how responses are formatted and logged
|
||||||
|
isGlAPIKey := false
|
||||||
|
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
|
||||||
|
log.Debugf("Request use generative language API Key: %s", glAPIKey)
|
||||||
|
isGlAPIKey = true
|
||||||
|
} else {
|
||||||
|
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
|
||||||
|
}
|
||||||
|
// Initiate streaming communication with the backend client
|
||||||
|
// This returns two channels: one for response chunks and one for errors
|
||||||
|
|
||||||
|
includeThoughts := false
|
||||||
|
if userAgent, hasKey := c.Request.Header["User-Agent"]; hasKey {
|
||||||
|
includeThoughts = !strings.Contains(userAgent[0], "claude-cli")
|
||||||
|
}
|
||||||
|
|
||||||
|
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJson, modelName, systemInstruction, contents, tools, includeThoughts)
|
||||||
|
|
||||||
|
// Track response state for proper Claude format conversion
|
||||||
|
hasFirstResponse := false
|
||||||
|
responseType := 0
|
||||||
|
responseIndex := 0
|
||||||
|
|
||||||
|
// Main streaming loop - handles multiple concurrent events using Go channels
|
||||||
|
// This select statement manages four different types of events simultaneously
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
// Case 1: Handle client disconnection
|
||||||
|
// Detects when the HTTP client has disconnected and cleans up resources
|
||||||
|
case <-c.Request.Context().Done():
|
||||||
|
if c.Request.Context().Err().Error() == "context canceled" {
|
||||||
|
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
|
||||||
|
cliCancel() // Cancel the backend request to prevent resource leaks
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 2: Process incoming response chunks from the backend
|
||||||
|
// This handles the actual streaming data from the AI model
|
||||||
|
case chunk, okStream := <-respChan:
|
||||||
|
if !okStream {
|
||||||
|
// Stream has ended - send the final message_stop event
|
||||||
|
// This follows the Claude API specification for stream termination
|
||||||
|
_, _ = c.Writer.Write([]byte(`event: message_stop`))
|
||||||
|
_, _ = c.Writer.Write([]byte("\n"))
|
||||||
|
_, _ = c.Writer.Write([]byte(`data: {"type":"message_stop"}`))
|
||||||
|
_, _ = c.Writer.Write([]byte("\n\n\n"))
|
||||||
|
|
||||||
|
flusher.Flush()
|
||||||
|
cliCancel()
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
// Convert the backend response to Claude-compatible format
|
||||||
|
// This translation layer ensures API compatibility
|
||||||
|
claudeFormat := translator.ConvertCliToClaude(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
|
||||||
|
if claudeFormat != "" {
|
||||||
|
_, _ = c.Writer.Write([]byte(claudeFormat))
|
||||||
|
flusher.Flush() // Immediately send the chunk to the client
|
||||||
|
}
|
||||||
|
hasFirstResponse = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 3: Handle errors from the backend
|
||||||
|
// This manages various error conditions and implements retry logic
|
||||||
|
case errInfo, okError := <-errChan:
|
||||||
|
if okError {
|
||||||
|
// Special handling for quota exceeded errors
|
||||||
|
// If configured, attempt to switch to a different project/client
|
||||||
|
if errInfo.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
|
||||||
|
continue outLoop // Restart the client selection process
|
||||||
|
} else {
|
||||||
|
// Forward other errors directly to the client
|
||||||
|
c.Status(errInfo.StatusCode)
|
||||||
|
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
|
||||||
|
flusher.Flush()
|
||||||
|
cliCancel()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 4: Send periodic keep-alive signals
|
||||||
|
// Prevents connection timeouts during long-running requests
|
||||||
|
case <-time.After(500 * time.Millisecond):
|
||||||
|
if hasFirstResponse {
|
||||||
|
// Send a ping event to maintain the connection
|
||||||
|
// This is especially important for slow AI model responses
|
||||||
|
output := "event: ping\n"
|
||||||
|
output = output + `data: {"type": "ping"}`
|
||||||
|
output = output + "\n\n\n"
|
||||||
|
_, _ = c.Writer.Write([]byte(output))
|
||||||
|
|
||||||
|
flusher.Flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user