CLIProxyAPI/internal/api/handlers/claude/code_handlers.go

// Package claude provides HTTP handlers for Claude API code-related functionality.
// This package implements Claude-compatible streaming chat completions with sophisticated
// client rotation and quota management systems to ensure high availability and optimal
// resource utilization across multiple backend clients. It handles request translation
// between Claude API format and the underlying Gemini backend, providing seamless
// API compatibility while maintaining robust error handling and connection management.
package claude

import (
	"bytes"
	"context"
	"fmt"
	"net/http"
	"strings"
	"time"

	"github.com/gin-gonic/gin"
	"github.com/luispater/CLIProxyAPI/internal/api/handlers"
	"github.com/luispater/CLIProxyAPI/internal/client"
	translatorClaudeCodeToCodex "github.com/luispater/CLIProxyAPI/internal/translator/codex/claude/code"
	translatorClaudeCodeToGeminiCli "github.com/luispater/CLIProxyAPI/internal/translator/gemini-cli/claude/code"
	translatorClaudeCodeToQwen "github.com/luispater/CLIProxyAPI/internal/translator/openai/claude"
	"github.com/luispater/CLIProxyAPI/internal/util"
	log "github.com/sirupsen/logrus"
	"github.com/tidwall/gjson"
	"github.com/tidwall/sjson"
)

// ClaudeCodeAPIHandlers contains the handlers for Claude API endpoints.
// It holds a pool of clients to interact with the backend service.
type ClaudeCodeAPIHandlers struct {
	*handlers.APIHandlers
}

// NewClaudeCodeAPIHandlers creates a new Claude API handlers instance.
// It takes an APIHandlers instance as input and returns a ClaudeCodeAPIHandlers.
func NewClaudeCodeAPIHandlers(apiHandlers *handlers.APIHandlers) *ClaudeCodeAPIHandlers {
	return &ClaudeCodeAPIHandlers{
		APIHandlers: apiHandlers,
	}
}

// ClaudeMessages handles Claude-compatible streaming chat completions.
// This function implements a sophisticated client rotation and quota management system
// to ensure high availability and optimal resource utilization across multiple backend clients.
func (h *ClaudeCodeAPIHandlers) ClaudeMessages(c *gin.Context) {
	// Extract raw JSON data from the incoming request
	rawJSON, err := c.GetRawData()
	// If data retrieval fails, return a 400 Bad Request error.
	if err != nil {
		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
			Error: handlers.ErrorDetail{
				Message: fmt.Sprintf("Invalid request: %v", err),
				Type:    "invalid_request_error",
			},
		})
		return
	}

	// h.handleGeminiStreamingResponse(c, rawJSON)
	// h.handleCodexStreamingResponse(c, rawJSON)
	modelName := gjson.GetBytes(rawJSON, "model")
	provider := util.GetProviderName(modelName.String())

	// Check if the client requested a streaming response.
	streamResult := gjson.GetBytes(rawJSON, "stream")
	if !streamResult.Exists() || streamResult.Type == gjson.False {
		return
	}

	if provider == "gemini" {
		h.handleGeminiStreamingResponse(c, rawJSON)
	} else if provider == "gpt" {
		h.handleCodexStreamingResponse(c, rawJSON)
	} else if provider == "claude" {
		h.handleClaudeStreamingResponse(c, rawJSON)
	} else if provider == "qwen" {
		h.handleQwenStreamingResponse(c, rawJSON)
	} else {
		h.handleGeminiStreamingResponse(c, rawJSON)
	}
}

// handleGeminiStreamingResponse streams Claude-compatible responses backed by Gemini.
// It sets up SSE, selects a backend client with rotation/quota logic,
// forwards chunks, and translates them to Claude CLI format.
func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSON []byte) {
	// Set up Server-Sent Events (SSE) headers for streaming response
	// These headers are essential for maintaining a persistent connection
	// and enabling real-time streaming of chat completions
	c.Header("Content-Type", "text/event-stream")
	c.Header("Cache-Control", "no-cache")
	c.Header("Connection", "keep-alive")
	c.Header("Access-Control-Allow-Origin", "*")

	// Get the http.Flusher interface to manually flush the response.
	// This is crucial for streaming as it allows immediate sending of data chunks
	flusher, ok := c.Writer.(http.Flusher)
	if !ok {
		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
			Error: handlers.ErrorDetail{
				Message: "Streaming not supported",
				Type:    "server_error",
			},
		})
		return
	}

	// Parse and prepare the Claude request, extracting model name, system instructions,
	// conversation contents, and available tools from the raw JSON
	modelName, systemInstruction, contents, tools := translatorClaudeCodeToGeminiCli.ConvertClaudeCodeRequestToCli(rawJSON)

	// Create a cancellable context for the backend client request
	// This allows proper cleanup and cancellation of ongoing requests
	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())

	var cliClient client.Client
	cliClient = client.NewGeminiClient(nil, nil, nil)
	defer func() {
		// Ensure the client's mutex is unlocked on function exit.
		// This prevents deadlocks and ensures proper resource cleanup
		if cliClient != nil {
			cliClient.GetRequestMutex().Unlock()
		}
	}()

	// Main client rotation loop with quota management
	// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
	for {
		var errorResponse *client.ErrorMessage
		cliClient, errorResponse = h.GetClient(modelName)
		if errorResponse != nil {
			c.Status(errorResponse.StatusCode)
			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
			flusher.Flush()
			cliCancel()
			return
		}

		// Determine the authentication method being used by the selected client
		// This affects how responses are formatted and logged
		isGlAPIKey := false
		if glAPIKey := cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey(); glAPIKey != "" {
			log.Debugf("Request use gemini generative language API Key: %s", glAPIKey)
			isGlAPIKey = true
		} else {
			log.Debugf("Request use gemini account: %s, project id: %s", cliClient.GetEmail(), cliClient.(*client.GeminiClient).GetProjectID())
		}
		// Initiate streaming communication with the backend client
		// This returns two channels: one for response chunks and one for errors

		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools, true)

		// Track response state for proper Claude format conversion
		hasFirstResponse := false
		responseType := 0
		responseIndex := 0

		// Main streaming loop - handles multiple concurrent events using Go channels
		// This select statement manages four different types of events simultaneously
		for {
			select {
			// Case 1: Handle client disconnection
			// Detects when the HTTP client has disconnected and cleans up resources
			case <-c.Request.Context().Done():
				if c.Request.Context().Err().Error() == "context canceled" {
					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
					cliCancel() // Cancel the backend request to prevent resource leaks
					return
				}

			// Case 2: Process incoming response chunks from the backend
			// This handles the actual streaming data from the AI model
			case chunk, okStream := <-respChan:
				if !okStream {
					// Stream has ended - send the final message_stop event
					// This follows the Claude API specification for stream termination
					_, _ = c.Writer.Write([]byte(`event: message_stop`))
					_, _ = c.Writer.Write([]byte("\n"))
					_, _ = c.Writer.Write([]byte(`data: {"type":"message_stop"}`))
					_, _ = c.Writer.Write([]byte("\n\n\n"))

					flusher.Flush()
					cliCancel()
					return
				}

				h.AddAPIResponseData(c, chunk)
				h.AddAPIResponseData(c, []byte("\n\n"))
				// Convert the backend response to Claude-compatible format
				// This translation layer ensures API compatibility
				claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
				if claudeFormat != "" {
					_, _ = c.Writer.Write([]byte(claudeFormat))
					flusher.Flush() // Immediately send the chunk to the client
				}
				hasFirstResponse = true

			// Case 3: Handle errors from the backend
			// This manages various error conditions and implements retry logic
			case errInfo, okError := <-errChan:
				if okError {
					// Special handling for quota exceeded errors
					// If configured, attempt to switch to a different project/client
					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
						continue outLoop // Restart the client selection process
					} else {
						// Forward other errors directly to the client
						c.Status(errInfo.StatusCode)
						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
						flusher.Flush()
						cliCancel(errInfo.Error)
					}
					return
				}

			// Case 4: Send periodic keep-alive signals
			// Prevents connection timeouts during long-running requests
			case <-time.After(500 * time.Millisecond):
				if hasFirstResponse {
					// Send a ping event to maintain the connection
					// This is especially important for slow AI model responses
					// output := "event: ping\n"
					// output = output + `data: {"type": "ping"}`
					// output = output + "\n\n\n"
					// _, _ = c.Writer.Write([]byte(output))
					//
					// flusher.Flush()
				}
			}
		}
	}
}

// handleCodexStreamingResponse streams Claude-compatible responses backed by OpenAI.
// It converts the Claude request into Codex/OpenAI responses format, establishes SSE,
// and translates streaming chunks back into Claude CLI events.
func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON []byte) {
	// Set up Server-Sent Events (SSE) headers for streaming response
	// These headers are essential for maintaining a persistent connection
	// and enabling real-time streaming of chat completions
	c.Header("Content-Type", "text/event-stream")
	c.Header("Cache-Control", "no-cache")
	c.Header("Connection", "keep-alive")
	c.Header("Access-Control-Allow-Origin", "*")

	// Get the http.Flusher interface to manually flush the response.
	// This is crucial for streaming as it allows immediate sending of data chunks
	flusher, ok := c.Writer.(http.Flusher)
	if !ok {
		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
			Error: handlers.ErrorDetail{
				Message: "Streaming not supported",
				Type:    "server_error",
			},
		})
		return
	}

	// Parse and prepare the Claude request, extracting model name, system instructions,
	// conversation contents, and available tools from the raw JSON
	newRequestJSON := translatorClaudeCodeToCodex.ConvertClaudeCodeRequestToCodex(rawJSON)
	modelName := gjson.GetBytes(rawJSON, "model").String()

	newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
	// log.Debugf(string(rawJSON))
	// log.Debugf(newRequestJSON)
	// return
	// Create a cancellable context for the backend client request
	// This allows proper cleanup and cancellation of ongoing requests
	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())

	var cliClient client.Client
	defer func() {
		// Ensure the client's mutex is unlocked on function exit.
		// This prevents deadlocks and ensures proper resource cleanup
		if cliClient != nil {
			cliClient.GetRequestMutex().Unlock()
		}
	}()

	// Main client rotation loop with quota management
	// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
	for {
		var errorResponse *client.ErrorMessage
		cliClient, errorResponse = h.GetClient(modelName)
		if errorResponse != nil {
			c.Status(errorResponse.StatusCode)
			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
			flusher.Flush()
			cliCancel()
			return
		}

		log.Debugf("Request use codex account: %s", cliClient.GetEmail())

		// Initiate streaming communication with the backend client
		// This returns two channels: one for response chunks and one for errors
		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")

		// Track response state for proper Claude format conversion
		// hasFirstResponse := false
		hasToolCall := false

		// Main streaming loop - handles multiple concurrent events using Go channels
		// This select statement manages four different types of events simultaneously
		for {
			select {
			// Case 1: Handle client disconnection
			// Detects when the HTTP client has disconnected and cleans up resources
			case <-c.Request.Context().Done():
				if c.Request.Context().Err().Error() == "context canceled" {
					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
					cliCancel() // Cancel the backend request to prevent resource leaks
					return
				}

			// Case 2: Process incoming response chunks from the backend
			// This handles the actual streaming data from the AI model
			case chunk, okStream := <-respChan:
				if !okStream {
					flusher.Flush()
					cliCancel()
					return
				}

				h.AddAPIResponseData(c, chunk)
				h.AddAPIResponseData(c, []byte("\n\n"))

				// Convert the backend response to Claude-compatible format
				// This translation layer ensures API compatibility
				if bytes.HasPrefix(chunk, []byte("data: ")) {
					jsonData := chunk[6:]
					var claudeFormat string
					claudeFormat, hasToolCall = translatorClaudeCodeToCodex.ConvertCodexResponseToClaude(jsonData, hasToolCall)
					// log.Debugf("claudeFormat: %s", claudeFormat)
					if claudeFormat != "" {
						_, _ = c.Writer.Write([]byte(claudeFormat))
						_, _ = c.Writer.Write([]byte("\n"))
					}
					flusher.Flush() // Immediately send the chunk to the client
					// hasFirstResponse = true
				} else {
					// log.Debugf("chunk: %s", string(chunk))
				}
			// Case 3: Handle errors from the backend
			// This manages various error conditions and implements retry logic
			case errInfo, okError := <-errChan:
				if okError {
					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
					// Special handling for quota exceeded errors
					// If configured, attempt to switch to a different project/client
					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
						log.Debugf("quota exceeded, switch client")
						continue outLoop // Restart the client selection process
					} else {
						// Forward other errors directly to the client
						c.Status(errInfo.StatusCode)
						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
						flusher.Flush()
						cliCancel(errInfo.Error)
					}
					return
				}

			// Case 4: Send periodic keep-alive signals
			// Prevents connection timeouts during long-running requests
			case <-time.After(3000 * time.Millisecond):
				// if hasFirstResponse {
				// 	// Send a ping event to maintain the connection
				// 	// This is especially important for slow AI model responses
				// 	output := "event: ping\n"
				// 	output = output + `data: {"type": "ping"}`
				// 	output = output + "\n\n"
				// 	_, _ = c.Writer.Write([]byte(output))
				//
				// 	flusher.Flush()
				// }
			}
		}
	}
}

// handleClaudeStreamingResponse streams Claude-compatible responses backed by OpenAI.
// It converts the Claude request into OpenAI responses format, establishes SSE,
// and translates streaming chunks back into Claude Code events.
func (h *ClaudeCodeAPIHandlers) handleClaudeStreamingResponse(c *gin.Context, rawJSON []byte) {

	// Get the http.Flusher interface to manually flush the response.
	// This is crucial for streaming as it allows immediate sending of data chunks
	flusher, ok := c.Writer.(http.Flusher)
	if !ok {
		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
			Error: handlers.ErrorDetail{
				Message: "Streaming not supported",
				Type:    "server_error",
			},
		})
		return
	}

	modelName := gjson.GetBytes(rawJSON, "model").String()

	// Create a cancellable context for the backend client request
	// This allows proper cleanup and cancellation of ongoing requests
	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())

	var cliClient client.Client
	defer func() {
		// Ensure the client's mutex is unlocked on function exit.
		// This prevents deadlocks and ensures proper resource cleanup
		if cliClient != nil {
			cliClient.GetRequestMutex().Unlock()
		}
	}()

	// Main client rotation loop with quota management
	// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
	for {
		var errorResponse *client.ErrorMessage
		cliClient, errorResponse = h.GetClient(modelName)
		if errorResponse != nil {

			if errorResponse.StatusCode == 429 {
				c.Header("Content-Type", "application/json")
				c.Header("Content-Length", fmt.Sprintf("%d", len(errorResponse.Error.Error())))
			}
			c.Status(errorResponse.StatusCode)

			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
			flusher.Flush()
			cliCancel()

			return
		}

		if apiKey := cliClient.(*client.ClaudeClient).GetAPIKey(); apiKey != "" {
			log.Debugf("Request claude use API Key: %s", apiKey)
		} else {
			log.Debugf("Request claude use account: %s", cliClient.(*client.ClaudeClient).GetEmail())
		}

		// Initiate streaming communication with the backend client
		// This returns two channels: one for response chunks and one for errors
		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")

		hasFirstResponse := false
		// Main streaming loop - handles multiple concurrent events using Go channels
		// This select statement manages four different types of events simultaneously
		for {
			select {
			// Case 1: Handle client disconnection
			// Detects when the HTTP client has disconnected and cleans up resources
			case <-c.Request.Context().Done():
				if c.Request.Context().Err().Error() == "context canceled" {
					log.Debugf("ClaudeClient disconnected: %v", c.Request.Context().Err())
					cliCancel() // Cancel the backend request to prevent resource leaks
					return
				}

			// Case 2: Process incoming response chunks from the backend
			// This handles the actual streaming data from the AI model
			case chunk, okStream := <-respChan:
				if !okStream {
					flusher.Flush()
					cliCancel()
					return
				}
				h.AddAPIResponseData(c, chunk)
				h.AddAPIResponseData(c, []byte("\n\n"))

				if !hasFirstResponse {
					// Set up Server-Sent Events (SSE) headers for streaming response
					// These headers are essential for maintaining a persistent connection
					// and enabling real-time streaming of chat completions
					c.Header("Content-Type", "text/event-stream")
					c.Header("Cache-Control", "no-cache")
					c.Header("Connection", "keep-alive")
					c.Header("Access-Control-Allow-Origin", "*")
					hasFirstResponse = true
				}

				_, _ = c.Writer.Write(chunk)
				_, _ = c.Writer.Write([]byte("\n"))
				flusher.Flush()

			// Case 3: Handle errors from the backend
			// This manages various error conditions and implements retry logic
			case errInfo, okError := <-errChan:
				if okError {
					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
					// Special handling for quota exceeded errors
					// If configured, attempt to switch to a different project/client
					// if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
						log.Debugf("quota exceeded, switch client")
						continue outLoop // Restart the client selection process
					} else {
						// Forward other errors directly to the client
						if errInfo.Addon != nil {
							for key, val := range errInfo.Addon {
								c.Header(key, val[0])
							}
						}

						c.Status(errInfo.StatusCode)

						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
						flusher.Flush()
						cliCancel(errInfo.Error)
					}
					return
				}

			// Case 4: Send periodic keep-alive signals
			// Prevents connection timeouts during long-running requests
			case <-time.After(3000 * time.Millisecond):
			}
		}
	}
}

// handleQwenStreamingResponse streams Claude-compatible responses backed by OpenAI.
// It converts the Claude request into Qwen responses format, establishes SSE,
// and translates streaming chunks back into Claude Code events.
func (h *ClaudeCodeAPIHandlers) handleQwenStreamingResponse(c *gin.Context, rawJSON []byte) {
	// Set up Server-Sent Events (SSE) headers for streaming response
	// These headers are essential for maintaining a persistent connection
	// and enabling real-time streaming of chat completions
	c.Header("Content-Type", "text/event-stream")
	c.Header("Cache-Control", "no-cache")
	c.Header("Connection", "keep-alive")
	c.Header("Access-Control-Allow-Origin", "*")

	// Get the http.Flusher interface to manually flush the response.
	// This is crucial for streaming as it allows immediate sending of data chunks
	flusher, ok := c.Writer.(http.Flusher)
	if !ok {
		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
			Error: handlers.ErrorDetail{
				Message: "Streaming not supported",
				Type:    "server_error",
			},
		})
		return
	}

	// Parse and prepare the Claude request, extracting model name, system instructions,
	// conversation contents, and available tools from the raw JSON
	newRequestJSON := translatorClaudeCodeToQwen.ConvertAnthropicRequestToOpenAI(rawJSON)
	modelName := gjson.GetBytes(rawJSON, "model").String()

	newRequestJSON, _ = sjson.Set(newRequestJSON, "model", modelName)
	// log.Debugf(string(rawJSON))
	// log.Debugf(newRequestJSON)
	// return
	// Create a cancellable context for the backend client request
	// This allows proper cleanup and cancellation of ongoing requests
	cliCtx, cliCancel := h.GetContextWithCancel(c, context.Background())

	var cliClient client.Client
	defer func() {
		// Ensure the client's mutex is unlocked on function exit.
		// This prevents deadlocks and ensures proper resource cleanup
		if cliClient != nil {
			cliClient.GetRequestMutex().Unlock()
		}
	}()

	// Main client rotation loop with quota management
	// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
	for {
		var errorResponse *client.ErrorMessage
		cliClient, errorResponse = h.GetClient(modelName)
		if errorResponse != nil {
			c.Status(errorResponse.StatusCode)
			_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
			flusher.Flush()
			cliCancel()
			return
		}

		log.Debugf("Request use qwen account: %s", cliClient.GetEmail())

		// Initiate streaming communication with the backend client
		// This returns two channels: one for response chunks and one for errors
		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")

		// Track response state for proper Claude format conversion

		params := &translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropicParams{
			MessageID:            "",
			Model:                "",
			CreatedAt:            0,
			ContentAccumulator:   strings.Builder{},
			ToolCallsAccumulator: nil,
		}

		// Main streaming loop - handles multiple concurrent events using Go channels
		// This select statement manages four different types of events simultaneously
		for {
			select {
			// Case 1: Handle client disconnection
			// Detects when the HTTP client has disconnected and cleans up resources
			case <-c.Request.Context().Done():
				if c.Request.Context().Err().Error() == "context canceled" {
					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
					cliCancel() // Cancel the backend request to prevent resource leaks
					return
				}

			// Case 2: Process incoming response chunks from the backend
			// This handles the actual streaming data from the AI model
			case chunk, okStream := <-respChan:
				if !okStream {
					flusher.Flush()
					cliCancel()
					return
				}

				h.AddAPIResponseData(c, chunk)
				h.AddAPIResponseData(c, []byte("\n"))

				// Convert the backend response to Claude-compatible format
				// This translation layer ensures API compatibility
				if bytes.HasPrefix(chunk, []byte("data: ")) {
					jsonData := chunk[6:]
					outputs := translatorClaudeCodeToQwen.ConvertOpenAIResponseToAnthropic(jsonData, params)
					if len(outputs) > 0 {
						for i := 0; i < len(outputs); i++ {
							_, _ = c.Writer.Write([]byte("data: "))
							_, _ = c.Writer.Write([]byte(outputs[i]))
						}
					}
					flusher.Flush() // Immediately send the chunk to the client
					// hasFirstResponse = true
				} else {
					// log.Debugf("chunk: %s", string(chunk))
				}
			// Case 3: Handle errors from the backend
			// This manages various error conditions and implements retry logic
			case errInfo, okError := <-errChan:
				if okError {
					// log.Debugf("Code: %d, Error: %v", errInfo.StatusCode, errInfo.Error)
					// Special handling for quota exceeded errors
					// If configured, attempt to switch to a different project/client
					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
						log.Debugf("quota exceeded, switch client")
						continue outLoop // Restart the client selection process
					} else {
						// Forward other errors directly to the client
						c.Status(errInfo.StatusCode)
						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
						flusher.Flush()
						cliCancel(errInfo.Error)
					}
					return
				}

			// Case 4: Send periodic keep-alive signals
			// Prevents connection timeouts during long-running requests
			case <-time.After(3000 * time.Millisecond):
			}
		}
	}
}