mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 04:50:52 +08:00
Add claude code support
This commit is contained in:
@@ -11,7 +11,6 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
@@ -60,10 +59,19 @@ func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
|
||||
// h.handleCodexStreamingResponse(c, rawJSON)
|
||||
modelName := gjson.GetBytes(rawJSON, "model")
|
||||
provider := util.GetProviderName(modelName.String())
|
||||
|
||||
// Check if the client requested a streaming response.
|
||||
streamResult := gjson.GetBytes(rawJSON, "stream")
|
||||
if streamResult.Type == gjson.False {
|
||||
return
|
||||
}
|
||||
|
||||
if provider == "gemini" {
|
||||
h.handleGeminiStreamingResponse(c, rawJSON)
|
||||
} else if provider == "gpt" {
|
||||
h.handleCodexStreamingResponse(c, rawJSON)
|
||||
} else if provider == "claude" {
|
||||
h.handleClaudeStreamingResponse(c, rawJSON)
|
||||
} else {
|
||||
h.handleGeminiStreamingResponse(c, rawJSON)
|
||||
}
|
||||
@@ -98,14 +106,6 @@ func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, ra
|
||||
// conversation contents, and available tools from the raw JSON
|
||||
modelName, systemInstruction, contents, tools := translatorClaudeCodeToGeminiCli.ConvertClaudeCodeRequestToCli(rawJSON)
|
||||
|
||||
// Map Claude model names to corresponding Gemini models
|
||||
// This allows the proxy to handle Claude API calls using Gemini backend
|
||||
if modelName == "claude-sonnet-4-20250514" {
|
||||
modelName = "gemini-2.5-pro"
|
||||
} else if modelName == "claude-3-5-haiku-20241022" {
|
||||
modelName = "gemini-2.5-flash"
|
||||
}
|
||||
|
||||
// Create a cancellable context for the backend client request
|
||||
// This allows proper cleanup and cancellation of ongoing requests
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
@@ -128,7 +128,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -146,12 +146,7 @@ outLoop:
|
||||
// Initiate streaming communication with the backend client
|
||||
// This returns two channels: one for response chunks and one for errors
|
||||
|
||||
includeThoughts := false
|
||||
if userAgent, hasKey := c.Request.Header["User-Agent"]; hasKey {
|
||||
includeThoughts = !strings.Contains(userAgent[0], "claude-cli")
|
||||
}
|
||||
|
||||
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, includeThoughts)
|
||||
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, true)
|
||||
|
||||
// Track response state for proper Claude format conversion
|
||||
hasFirstResponse := false
|
||||
@@ -188,6 +183,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
// Convert the backend response to Claude-compatible format
|
||||
// This translation layer ensures API compatibility
|
||||
claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
|
||||
@@ -221,12 +217,12 @@ outLoop:
|
||||
if hasFirstResponse {
|
||||
// Send a ping event to maintain the connection
|
||||
// This is especially important for slow AI model responses
|
||||
output := "event: ping\n"
|
||||
output = output + `data: {"type": "ping"}`
|
||||
output = output + "\n\n\n"
|
||||
_, _ = c.Writer.Write([]byte(output))
|
||||
|
||||
flusher.Flush()
|
||||
// output := "event: ping\n"
|
||||
// output = output + `data: {"type": "ping"}`
|
||||
// output = output + "\n\n\n"
|
||||
// _, _ = c.Writer.Write([]byte(output))
|
||||
//
|
||||
// flusher.Flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -262,13 +258,7 @@ func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, raw
|
||||
// conversation contents, and available tools from the raw JSON
|
||||
newRequestJSON := translatorClaudeCodeToCodex.ConvertClaudeCodeRequestToCodex(rawJSON)
|
||||
modelName := gjson.GetBytes(rawJSON, "model").String()
|
||||
// Map Claude model names to corresponding Gemini models
|
||||
// This allows the proxy to handle Claude API calls using Gemini backend
|
||||
if modelName == "claude-sonnet-4-20250514" {
|
||||
modelName = "gpt-5"
|
||||
} else if modelName == "claude-3-5-haiku-20241022" {
|
||||
modelName = "gpt-5"
|
||||
}
|
||||
|
||||
newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
|
||||
// log.Debugf(string(rawJSON))
|
||||
// log.Debugf(newRequestJSON)
|
||||
@@ -294,7 +284,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -307,7 +297,7 @@ outLoop:
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
||||
|
||||
// Track response state for proper Claude format conversion
|
||||
hasFirstResponse := false
|
||||
// hasFirstResponse := false
|
||||
hasToolCall := false
|
||||
|
||||
// Main streaming loop - handles multiple concurrent events using Go channels
|
||||
@@ -333,6 +323,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
// Convert the backend response to Claude-compatible format
|
||||
// This translation layer ensures API compatibility
|
||||
@@ -346,7 +337,7 @@ outLoop:
|
||||
_, _ = c.Writer.Write([]byte("\n"))
|
||||
}
|
||||
flusher.Flush() // Immediately send the chunk to the client
|
||||
hasFirstResponse = true
|
||||
// hasFirstResponse = true
|
||||
} else {
|
||||
// log.Debugf("chunk: %s", string(chunk))
|
||||
}
|
||||
@@ -373,16 +364,156 @@ outLoop:
|
||||
// Case 4: Send periodic keep-alive signals
|
||||
// Prevents connection timeouts during long-running requests
|
||||
case <-time.After(3000 * time.Millisecond):
|
||||
if hasFirstResponse {
|
||||
// Send a ping event to maintain the connection
|
||||
// This is especially important for slow AI model responses
|
||||
output := "event: ping\n"
|
||||
output = output + `data: {"type": "ping"}`
|
||||
output = output + "\n\n"
|
||||
_, _ = c.Writer.Write([]byte(output))
|
||||
|
||||
flusher.Flush()
|
||||
}
|
||||
// if hasFirstResponse {
|
||||
// // Send a ping event to maintain the connection
|
||||
// // This is especially important for slow AI model responses
|
||||
// output := "event: ping\n"
|
||||
// output = output + `data: {"type": "ping"}`
|
||||
// output = output + "\n\n"
|
||||
// _, _ = c.Writer.Write([]byte(output))
|
||||
//
|
||||
// flusher.Flush()
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleClaudeStreamingResponse streams Claude-compatible responses backed by OpenAI.
|
||||
// It converts the Claude request into OpenAI responses format, establishes SSE,
|
||||
// and translates streaming chunks back into Claude Code events.
|
||||
func (h *ClaudeCodeAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {
|
||||
|
||||
// Get the http.Flusher interface to manually flush the response.
|
||||
// This is crucial for streaming as it allows immediate sending of data chunks
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: "Streaming not supported",
|
||||
Type: "server_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
modelName := gjson.GetBytes(rawJSON, "model").String()
|
||||
|
||||
// Create a cancellable context for the backend client request
|
||||
// This allows proper cleanup and cancellation of ongoing requests
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
|
||||
var cliClient client.Client
|
||||
defer func() {
|
||||
// Ensure the client's mutex is unlocked on function exit.
|
||||
// This prevents deadlocks and ensures proper resource cleanup
|
||||
if cliClient != nil {
|
||||
cliClient.GetRequestMutex().Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
// Main client rotation loop with quota management
|
||||
// This loop implements a sophisticated load balancing and failover mechanism
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
|
||||
if errorResponse.StatusCode == 429 {
|
||||
c.Header("Content-Type", "application/json")
|
||||
c.Header("Content-Length", fmt.Sprintf("%d", len(errorResponse.Error.Error())))
|
||||
}
|
||||
c.Status(errorResponse.StatusCode)
|
||||
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
||||
log.Debugf("Request claude use API Key: %s", apiKey)
|
||||
} else {
|
||||
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
||||
}
|
||||
|
||||
// Initiate streaming communication with the backend client
|
||||
// This returns two channels: one for response chunks and one for errors
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
|
||||
|
||||
hasFirstResponse := false
|
||||
// Main streaming loop - handles multiple concurrent events using Go channels
|
||||
// This select statement manages four different types of events simultaneously
|
||||
for {
|
||||
select {
|
||||
// Case 1: Handle client disconnection
|
||||
// Detects when the HTTP client has disconnected and cleans up resources
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("ClaudeClient disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request to prevent resource leaks
|
||||
return
|
||||
}
|
||||
|
||||
// Case 2: Process incoming response chunks from the backend
|
||||
// This handles the actual streaming data from the AI model
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if !hasFirstResponse {
|
||||
// Set up Server-Sent Events (SSE) headers for streaming response
|
||||
// These headers are essential for maintaining a persistent connection
|
||||
// and enabling real-time streaming of chat completions
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("Access-Control-Allow-Origin", "*")
|
||||
hasFirstResponse = true
|
||||
}
|
||||
|
||||
_, _ = c.Writer.Write(chunk)
|
||||
_, _ = c.Writer.Write([]byte("\n"))
|
||||
flusher.Flush()
|
||||
|
||||
// Case 3: Handle errors from the backend
|
||||
// This manages various error conditions and implements retry logic
|
||||
case errInfo, okError := <-errChan:
|
||||
if okError {
|
||||
// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
|
||||
// Special handling for quota exceeded errors
|
||||
// If configured, attempt to switch to a different project/client
|
||||
// if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
log.Debugf("quota exceeded, switch client")
|
||||
continue outLoop // Restart the client selection process
|
||||
} else {
|
||||
// Forward other errors directly to the client
|
||||
if errInfo.Addon != nil {
|
||||
for key, val := range errInfo.Addon {
|
||||
c.Header(key, val[0])
|
||||
}
|
||||
}
|
||||
|
||||
c.Status(errInfo.StatusCode)
|
||||
|
||||
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel(errInfo.Error)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Case 4: Send periodic keep-alive signals
|
||||
// Prevents connection timeouts during long-running requests
|
||||
case <-time.After(3000 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/luispater/CLIProxyAPI/internal/api/handlers"
|
||||
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||
translatorGeminiToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/gemini"
|
||||
translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
|
||||
"github.com/luispater/CLIProxyAPI/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -61,12 +62,16 @@ func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
|
||||
h.handleInternalGenerateContent(c, rawJSON)
|
||||
} else if provider == "gpt" {
|
||||
h.handleCodexInternalGenerateContent(c, rawJSON)
|
||||
} else if provider == "claude" {
|
||||
h.handleClaudeInternalGenerateContent(c, rawJSON)
|
||||
}
|
||||
} else if requestRawURI == "/v1internal:streamGenerateContent" {
|
||||
if provider == "gemini" || provider == "unknow" {
|
||||
h.handleInternalStreamGenerateContent(c, rawJSON)
|
||||
} else if provider == "gpt" {
|
||||
h.handleCodexInternalStreamGenerateContent(c, rawJSON)
|
||||
} else if provider == "claude" {
|
||||
h.handleClaudeInternalStreamGenerateContent(c, rawJSON)
|
||||
}
|
||||
} else {
|
||||
reqBody := bytes.NewBuffer(rawJSON)
|
||||
@@ -172,7 +177,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -204,6 +209,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
hasFirstResponse = true
|
||||
if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() != "" {
|
||||
@@ -258,7 +264,7 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
@@ -276,7 +282,7 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = c.Writer.Write([]byte(err.Error.Error()))
|
||||
log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
|
||||
// log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
break
|
||||
@@ -337,7 +343,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -373,6 +379,7 @@ outLoop:
|
||||
// _, _ = logFile.Write(chunk)
|
||||
// _, _ = logFile.Write([]byte("\n"))
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
@@ -397,7 +404,7 @@ outLoop:
|
||||
if errMessage.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
log.Debugf("code: %d, error: %s", errMessage.StatusCode, errMessage.Error.Error())
|
||||
// log.Debugf("code: %d, error: %s", errMessage.StatusCode, errMessage.Error.Error())
|
||||
c.Status(errMessage.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errMessage.Error.Error())
|
||||
flusher.Flush()
|
||||
@@ -414,7 +421,7 @@ outLoop:
|
||||
|
||||
func (h *GeminiCLIAPIHandlers) handleCodexInternalGenerateContent(c *gin.Context, rawJSON []byte) {
|
||||
c.Header("Content-Type", "application/json")
|
||||
orgRawJSON := rawJSON
|
||||
// orgRawJSON := rawJSON
|
||||
modelResult := gjson.GetBytes(rawJSON, "model")
|
||||
rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
|
||||
rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
|
||||
@@ -443,7 +450,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
@@ -469,6 +476,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
@@ -490,9 +498,231 @@ outLoop:
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
log.Debugf("org: %s", string(orgRawJSON))
|
||||
log.Debugf("raw: %s", string(rawJSON))
|
||||
log.Debugf("newRequestJSON: %s", newRequestJSON)
|
||||
// log.Debugf("org: %s", string(orgRawJSON))
|
||||
// log.Debugf("raw: %s", string(rawJSON))
|
||||
// log.Debugf("newRequestJSON: %s", newRequestJSON)
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *GeminiCLIAPIHandlers) handleClaudeInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("Access-Control-Allow-Origin", "*")
|
||||
|
||||
// Get the http.Flusher interface to manually flush the response.
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: "Streaming not supported",
|
||||
Type: "server_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
modelResult := gjson.GetBytes(rawJSON, "model")
|
||||
rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
|
||||
rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
|
||||
rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
|
||||
rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
|
||||
|
||||
// Prepare the request for the backend client.
|
||||
newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
|
||||
newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
|
||||
|
||||
modelName := gjson.GetBytes(rawJSON, "model")
|
||||
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
|
||||
var cliClient client.Client
|
||||
defer func() {
|
||||
// Ensure the client's mutex is unlocked on function exit.
|
||||
if cliClient != nil {
|
||||
cliClient.GetRequestMutex().Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
||||
log.Debugf("Request claude use API Key: %s", apiKey)
|
||||
} else {
|
||||
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
||||
}
|
||||
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
||||
|
||||
params := &translatorGeminiToClaude.ConvertAnthropicResponseToGeminiParams{
|
||||
Model: modelName.String(),
|
||||
CreatedAt: 0,
|
||||
ResponseID: "",
|
||||
}
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
data := gjson.ParseBytes(jsonData)
|
||||
typeResult := data.Get("type")
|
||||
if typeResult.String() != "" {
|
||||
// log.Debugf(string(jsonData))
|
||||
outputs := translatorGeminiToClaude.ConvertAnthropicResponseToGemini(jsonData, params)
|
||||
if len(outputs) > 0 {
|
||||
for i := 0; i < len(outputs); i++ {
|
||||
outputs[i], _ = sjson.SetRaw("{}", "response", outputs[i])
|
||||
_, _ = c.Writer.Write([]byte("data: "))
|
||||
_, _ = c.Writer.Write([]byte(outputs[i]))
|
||||
_, _ = c.Writer.Write([]byte("\n\n"))
|
||||
}
|
||||
}
|
||||
}
|
||||
// log.Debugf(string(jsonData))
|
||||
}
|
||||
flusher.Flush()
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *GeminiCLIAPIHandlers) handleClaudeInternalGenerateContent(c *gin.Context, rawJSON []byte) {
|
||||
c.Header("Content-Type", "application/json")
|
||||
|
||||
modelResult := gjson.GetBytes(rawJSON, "model")
|
||||
rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
|
||||
rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
|
||||
rawJSON, _ = sjson.SetRawBytes(rawJSON, "system_instruction", []byte(gjson.GetBytes(rawJSON, "systemInstruction").Raw))
|
||||
rawJSON, _ = sjson.DeleteBytes(rawJSON, "systemInstruction")
|
||||
|
||||
// Prepare the request for the backend client.
|
||||
newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
|
||||
// log.Debugf("Request: %s", newRequestJSON)
|
||||
newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
|
||||
|
||||
modelName := gjson.GetBytes(rawJSON, "model")
|
||||
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
|
||||
var cliClient client.Client
|
||||
defer func() {
|
||||
// Ensure the client's mutex is unlocked on function exit.
|
||||
if cliClient != nil {
|
||||
cliClient.GetRequestMutex().Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
||||
log.Debugf("Request claude use API Key: %s", apiKey)
|
||||
} else {
|
||||
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
||||
}
|
||||
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
||||
|
||||
var allChunks [][]byte
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
if len(allChunks) > 0 {
|
||||
// Use the last chunk which should contain the complete message
|
||||
finalResponseStr := translatorGeminiToClaude.ConvertAnthropicResponseToGeminiNonStream(allChunks, modelName.String())
|
||||
finalResponse := []byte(finalResponseStr)
|
||||
_, _ = c.Writer.Write(finalResponse)
|
||||
}
|
||||
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
// Store chunk for building final response
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
allChunks = append(allChunks, jsonData)
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
return
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/luispater/CLIProxyAPI/internal/api/handlers"
|
||||
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||
translatorGeminiToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/gemini"
|
||||
translatorGeminiToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/gemini"
|
||||
translatorGeminiToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/gemini/cli"
|
||||
"github.com/luispater/CLIProxyAPI/internal/util"
|
||||
@@ -233,7 +234,13 @@ func (h *GeminiAPIHandlers) GeminiHandler(c *gin.Context) {
|
||||
case "streamGenerateContent":
|
||||
h.handleCodexStreamGenerateContent(c, rawJSON)
|
||||
}
|
||||
|
||||
} else if provider == "claude" {
|
||||
switch method {
|
||||
case "generateContent":
|
||||
h.handleClaudeGenerateContent(c, rawJSON)
|
||||
case "streamGenerateContent":
|
||||
h.handleClaudeStreamGenerateContent(c, rawJSON)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -278,7 +285,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -340,6 +347,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() == "" {
|
||||
if alt == "" {
|
||||
@@ -377,7 +385,7 @@ outLoop:
|
||||
log.Debugf("quota exceeded, switch client")
|
||||
continue outLoop
|
||||
} else {
|
||||
log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
|
||||
// log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
@@ -413,7 +421,7 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
|
||||
cliClient, errorResponse = h.GetClient(modelName, false)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
@@ -482,7 +490,7 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
@@ -587,7 +595,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -621,6 +629,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
@@ -684,7 +693,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
@@ -710,6 +719,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
@@ -741,3 +751,213 @@ outLoop:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *GeminiAPIHandlers) handleClaudeStreamGenerateContent(c *gin.Context, rawJSON []byte) {
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("Access-Control-Allow-Origin", "*")
|
||||
|
||||
// Get the http.Flusher interface to manually flush the response.
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: "Streaming not supported",
|
||||
Type: "server_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Prepare the request for the backend client.
|
||||
newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
|
||||
newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
|
||||
// log.Debugf("Request: %s", newRequestJSON)
|
||||
|
||||
modelName := gjson.GetBytes(rawJSON, "model")
|
||||
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
|
||||
var cliClient client.Client
|
||||
defer func() {
|
||||
// Ensure the client's mutex is unlocked on function exit.
|
||||
if cliClient != nil {
|
||||
cliClient.GetRequestMutex().Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
||||
log.Debugf("Request claude use API Key: %s", apiKey)
|
||||
} else {
|
||||
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
||||
}
|
||||
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
||||
|
||||
params := &translatorGeminiToClaude.ConvertAnthropicResponseToGeminiParams{
|
||||
Model: modelName.String(),
|
||||
CreatedAt: 0,
|
||||
ResponseID: "",
|
||||
}
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
data := gjson.ParseBytes(jsonData)
|
||||
typeResult := data.Get("type")
|
||||
if typeResult.String() != "" {
|
||||
// log.Debugf(string(jsonData))
|
||||
outputs := translatorGeminiToClaude.ConvertAnthropicResponseToGemini(jsonData, params)
|
||||
if len(outputs) > 0 {
|
||||
for i := 0; i < len(outputs); i++ {
|
||||
_, _ = c.Writer.Write([]byte("data: "))
|
||||
_, _ = c.Writer.Write([]byte(outputs[i]))
|
||||
_, _ = c.Writer.Write([]byte("\n\n"))
|
||||
}
|
||||
}
|
||||
}
|
||||
// log.Debugf(string(jsonData))
|
||||
}
|
||||
flusher.Flush()
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *GeminiAPIHandlers) handleClaudeGenerateContent(c *gin.Context, rawJSON []byte) {
|
||||
c.Header("Content-Type", "application/json")
|
||||
|
||||
// Prepare the request for the backend client.
|
||||
newRequestJSON := translatorGeminiToClaude.ConvertGeminiRequestToAnthropic(rawJSON)
|
||||
// log.Debugf("Request: %s", newRequestJSON)
|
||||
newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
|
||||
|
||||
modelName := gjson.GetBytes(rawJSON, "model")
|
||||
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
|
||||
var cliClient client.Client
|
||||
defer func() {
|
||||
// Ensure the client's mutex is unlocked on function exit.
|
||||
if cliClient != nil {
|
||||
cliClient.GetRequestMutex().Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
||||
log.Debugf("Request claude use API Key: %s", apiKey)
|
||||
} else {
|
||||
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
||||
}
|
||||
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
||||
|
||||
var allChunks [][]byte
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
if len(allChunks) > 0 {
|
||||
// Use the last chunk which should contain the complete message
|
||||
finalResponseStr := translatorGeminiToClaude.ConvertAnthropicResponseToGeminiNonStream(allChunks, modelName.String())
|
||||
finalResponse := []byte(finalResponseStr)
|
||||
_, _ = c.Writer.Write(finalResponse)
|
||||
}
|
||||
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
// Store chunk for building final response
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
allChunks = append(allChunks, jsonData)
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,6 +112,12 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
|
||||
clients = append(clients, cli)
|
||||
}
|
||||
}
|
||||
} else if provider == "claude" {
|
||||
for i := 0; i < len(h.CliClients); i++ {
|
||||
if cli, ok := h.CliClients[i].(*client.ClaudeClient); ok {
|
||||
clients = append(clients, cli)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, hasKey := h.LastUsedClientIndex[provider]; !hasKey {
|
||||
@@ -142,6 +148,8 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
|
||||
log.Debugf("Gemini Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
|
||||
} else if provider == "gpt" {
|
||||
log.Debugf("Codex Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
|
||||
} else if provider == "claude" {
|
||||
log.Debugf("Claude Model %s is quota exceeded for account %s", modelName, cliClient.GetEmail())
|
||||
}
|
||||
cliClient = nil
|
||||
continue
|
||||
@@ -151,6 +159,10 @@ func (h *APIHandlers) GetClient(modelName string, isGenerateContent ...bool) (cl
|
||||
}
|
||||
|
||||
if len(reorderedClients) == 0 {
|
||||
if provider == "claude" {
|
||||
// log.Debugf("Claude Model %s is quota exceeded for all accounts", modelName)
|
||||
return nil, &client.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account's rate limit. Please try again later."}}`)}
|
||||
}
|
||||
return nil, &client.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName)}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,11 +15,13 @@ import (
|
||||
|
||||
"github.com/luispater/CLIProxyAPI/internal/api/handlers"
|
||||
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||
translatorOpenAIToClaude "github.com/luispater/CLIProxyAPI/internal/translator/claude/openai"
|
||||
translatorOpenAIToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/openai"
|
||||
translatorOpenAIToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/openai"
|
||||
"github.com/luispater/CLIProxyAPI/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
@@ -107,6 +109,23 @@ func (h *OpenAIAPIHandlers) Models(c *gin.Context) {
|
||||
"maxTemperature": 2,
|
||||
"thinking": true,
|
||||
},
|
||||
{
|
||||
"id": "claude-opus-4-1-20250805",
|
||||
"object": "model",
|
||||
"version": "claude-opus-4-1-20250805",
|
||||
"name": "Claude Opus 4.1",
|
||||
"description": "Anthropic's most capable model.",
|
||||
"context_length": 200_000,
|
||||
"max_completion_tokens": 32_000,
|
||||
"supported_parameters": []string{
|
||||
"tools",
|
||||
},
|
||||
"temperature": 1,
|
||||
"topP": 0.95,
|
||||
"topK": 64,
|
||||
"maxTemperature": 2,
|
||||
"thinking": true,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -146,6 +165,12 @@ func (h *OpenAIAPIHandlers) ChatCompletions(c *gin.Context) {
|
||||
} else {
|
||||
h.handleCodexNonStreamingResponse(c, rawJSON)
|
||||
}
|
||||
} else if provider == "claude" {
|
||||
if streamResult.Type == gjson.True {
|
||||
h.handleClaudeStreamingResponse(c, rawJSON)
|
||||
} else {
|
||||
h.handleClaudeNonStreamingResponse(c, rawJSON)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,7 +199,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
@@ -251,7 +276,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -288,6 +313,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
// Convert the chunk to OpenAI format and send it to the client.
|
||||
hasFirstResponse = true
|
||||
@@ -374,6 +400,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
@@ -451,7 +478,7 @@ outLoop:
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
@@ -481,6 +508,7 @@ outLoop:
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
// log.Debugf("Response: %s\n", string(chunk))
|
||||
// Convert the chunk to OpenAI format and send it to the client.
|
||||
@@ -519,3 +547,217 @@ outLoop:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleClaudeNonStreamingResponse handles non-streaming chat completion responses
|
||||
// for anthropic models. It uses the streaming interface internally but aggregates
|
||||
// all responses before sending back a complete non-streaming response in OpenAI format.
|
||||
//
|
||||
// Parameters:
|
||||
// - c: The Gin context containing the HTTP request and response
|
||||
// - rawJSON: The raw JSON bytes of the OpenAI-compatible request
|
||||
func (h *OpenAIAPIHandlers) handleClaudeNonStreamingResponse(c *gin.Context, rawJSON []byte) {
|
||||
c.Header("Content-Type", "application/json")
|
||||
|
||||
// Force streaming in the request to use the streaming interface
|
||||
newRequestJSON := translatorOpenAIToClaude.ConvertOpenAIRequestToAnthropic(rawJSON)
|
||||
// Ensure stream is set to true for the backend request
|
||||
newRequestJSON, _ = sjson.Set(newRequestJSON, "stream", true)
|
||||
|
||||
modelName := gjson.GetBytes(rawJSON, "model")
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
|
||||
var cliClient client.Client
|
||||
defer func() {
|
||||
if cliClient != nil {
|
||||
cliClient.GetRequestMutex().Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
||||
log.Debugf("Request claude use API Key: %s", apiKey)
|
||||
} else {
|
||||
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
||||
}
|
||||
|
||||
// Use streaming interface but collect all responses
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
||||
|
||||
// Collect all streaming chunks to build the final response
|
||||
var allChunks [][]byte
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
// All chunks received, now build the final non-streaming response
|
||||
if len(allChunks) > 0 {
|
||||
// Use the last chunk which should contain the complete message
|
||||
finalResponseStr := translatorOpenAIToClaude.ConvertAnthropicStreamingResponseToOpenAINonStream(allChunks)
|
||||
finalResponse := []byte(finalResponseStr)
|
||||
_, _ = c.Writer.Write(finalResponse)
|
||||
}
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
// Store chunk for building final response
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
allChunks = append(allChunks, jsonData)
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
return
|
||||
}
|
||||
case <-time.After(30 * time.Second):
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleClaudeStreamingResponse handles streaming responses for anthropic models.
|
||||
// It establishes a streaming connection with the backend service and forwards
|
||||
// the response chunks to the client in real-time using Server-Sent Events.
|
||||
//
|
||||
// Parameters:
|
||||
// - c: The Gin context containing the HTTP request and response
|
||||
// - rawJSON: The raw JSON bytes of the OpenAI-compatible request
|
||||
func (h *OpenAIAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("Access-Control-Allow-Origin", "*")
|
||||
|
||||
// Get the http.Flusher interface to manually flush the response.
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
||||
Error: handlers.ErrorDetail{
|
||||
Message: "Streaming not supported",
|
||||
Type: "server_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Prepare the request for the backend client.
|
||||
newRequestJSON := translatorOpenAIToClaude.ConvertOpenAIRequestToAnthropic(rawJSON)
|
||||
modelName := gjson.GetBytes(rawJSON, "model")
|
||||
cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())
|
||||
|
||||
var cliClient client.Client
|
||||
defer func() {
|
||||
// Ensure the client's mutex is unlocked on function exit.
|
||||
if cliClient != nil {
|
||||
cliClient.GetRequestMutex().Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.GetClient(modelName.String())
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
|
||||
log.Debugf("Request claude use API Key: %s", apiKey)
|
||||
} else {
|
||||
log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
|
||||
}
|
||||
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
|
||||
params := &translatorOpenAIToClaude.ConvertAnthropicResponseToOpenAIParams{
|
||||
CreatedAt: 0,
|
||||
ResponseID: "",
|
||||
FinishReason: "",
|
||||
}
|
||||
|
||||
hasFirstResponse := false
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
h.AddAPIResponseData(c, chunk)
|
||||
h.AddAPIResponseData(c, []byte("\n\n"))
|
||||
|
||||
if bytes.HasPrefix(chunk, []byte("data: ")) {
|
||||
jsonData := chunk[6:]
|
||||
// Convert the chunk to OpenAI format and send it to the client.
|
||||
hasFirstResponse = true
|
||||
openAIFormats := translatorOpenAIToClaude.ConvertAnthropicResponseToOpenAI(jsonData, params)
|
||||
for i := 0; i < len(openAIFormats); i++ {
|
||||
_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormats[i])
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel(err.Error)
|
||||
}
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
if hasFirstResponse {
|
||||
_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user