Refactor API handlers to implement retry mechanism with configurable limits and improved error handling

- Introduced retry counter with a configurable ` RequestRetry ` limit in all handlers.
- Enhanced error handling with specific HTTP status codes for switching clients.
- Standardized response forwarding for non-retriable errors.
- Improved logging for quota and client switch scenarios.
This commit is contained in:
Luis Pater
2025-08-25 23:27:30 +08:00
parent 8c555c4e69
commit 9102ff031d
11 changed files with 148 additions and 43 deletions

View File

@@ -122,11 +122,11 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
cliClient.GetRequestMutex().Unlock()
}
}()
retryCount := 0
// Main client rotation loop with quota management
// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
for {
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -170,9 +170,17 @@ outLoop:
if okError {
// Special handling for quota exceeded errors
// If configured, attempt to switch to a different project/client
if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
continue outLoop // Restart the client selection process
} else {
switch errInfo.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue outLoop // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", errInfo.StatusCode)
retryCount++
continue outLoop
default:
// Forward other errors directly to the client
c.Status(errInfo.StatusCode)
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())

View File

@@ -167,8 +167,9 @@ func (h *GeminiCLIAPIHandler) handleInternalStreamGenerateContent(c *gin.Context
}
}()
retryCount := 0
outLoop:
for {
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -205,9 +206,18 @@ outLoop:
// Handle errors from the backend.
case err, okError := <-errChan:
if okError {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue outLoop // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue outLoop
} else {
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()
@@ -238,7 +248,8 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
}
}()
for {
retryCount := 0
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -250,12 +261,20 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
if err != nil {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue
} else {
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
// log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
cliCancel(err.Error)
}
break

View File

@@ -270,8 +270,9 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName
}
}()
retryCount := 0
outLoop:
for {
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -311,11 +312,18 @@ outLoop:
// Handle errors from the backend.
case err, okError := <-errChan:
if okError {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue outLoop // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue outLoop
} else {
// log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()
@@ -402,7 +410,8 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
}
}()
for {
retryCount := 0
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -414,9 +423,18 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, alt)
if err != nil {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue
} else {
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
cliCancel(err.Error)

View File

@@ -183,7 +183,8 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
}
}()
for {
retryCount := 0
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -195,9 +196,18 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
if err != nil {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue
} else {
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
cliCancel(err.Error)
@@ -247,8 +257,9 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
}
}()
retryCount := 0
outLoop:
for {
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -286,9 +297,18 @@ outLoop:
// Handle errors from the backend.
case err, okError := <-errChan:
if okError {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue outLoop // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue outLoop
} else {
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()

View File

@@ -37,6 +37,9 @@ type Server struct {
// cfg holds the current server configuration.
cfg *config.Config
// requestLogger is the request logger instance for dynamic configuration updates.
requestLogger *logging.FileRequestLogger
}
// NewServer creates and initializes a new API server instance.
@@ -69,9 +72,10 @@ func NewServer(cfg *config.Config, cliClients []interfaces.Client) *Server {
// Create server instance
s := &Server{
engine: engine,
handlers: handlers.NewBaseAPIHandlers(cliClients, cfg),
cfg: cfg,
engine: engine,
handlers: handlers.NewBaseAPIHandlers(cliClients, cfg),
cfg: cfg,
requestLogger: requestLogger,
}
// Setup routes
@@ -189,6 +193,12 @@ func corsMiddleware() gin.HandlerFunc {
// - clients: The new slice of AI service clients
// - cfg: The new application configuration
func (s *Server) UpdateClients(clients []interfaces.Client, cfg *config.Config) {
// Update request logger enabled state if it has changed
if s.requestLogger != nil && s.cfg.RequestLog != cfg.RequestLog {
s.requestLogger.SetEnabled(cfg.RequestLog)
log.Debugf("request logging updated from %t to %t", s.cfg.RequestLog, cfg.RequestLog)
}
s.cfg = cfg
s.handlers.UpdateClients(clients, cfg)
log.Infof("server clients and configuration updated: %d clients", len(clients))

View File

@@ -37,6 +37,9 @@ type Config struct {
// RequestLog enables or disables detailed request logging functionality.
RequestLog bool `yaml:"request-log"`
// RequestRetry defines the retry times when the request failed.
RequestRetry int `yaml:"request-retry"`
ClaudeKey []ClaudeKey `yaml:"claude-api-key"`
}

View File

@@ -115,6 +115,15 @@ func (l *FileRequestLogger) IsEnabled() bool {
return l.enabled
}
// SetEnabled updates the request logging enabled state.
// This method allows dynamic enabling/disabling of request logging.
//
// Parameters:
// - enabled: Whether request logging should be enabled
func (l *FileRequestLogger) SetEnabled(enabled bool) {
l.enabled = enabled
}
// LogRequest logs a complete non-streaming request/response cycle to a file.
//
// Parameters:

View File

@@ -169,6 +169,12 @@ func (w *Watcher) reloadConfig() {
if oldConfig.ProxyURL != newConfig.ProxyURL {
log.Debugf(" proxy-url: %s -> %s", oldConfig.ProxyURL, newConfig.ProxyURL)
}
if oldConfig.RequestLog != newConfig.RequestLog {
log.Debugf(" request-log: %t -> %t", oldConfig.RequestLog, newConfig.RequestLog)
}
if oldConfig.RequestRetry != newConfig.RequestRetry {
log.Debugf(" request-retry: %d -> %d", oldConfig.RequestRetry, newConfig.RequestRetry)
}
if len(oldConfig.APIKeys) != len(newConfig.APIKeys) {
log.Debugf(" api-keys count: %d -> %d", len(oldConfig.APIKeys), len(newConfig.APIKeys))
}