Refactor API handlers to implement retry mechanism with configurable limits and improved error handling

- Introduced retry counter with a configurable ` RequestRetry ` limit in all handlers.
- Enhanced error handling with specific HTTP status codes for switching clients.
- Standardized response forwarding for non-retriable errors.
- Improved logging for quota and client switch scenarios.
This commit is contained in:
Luis Pater
2025-08-25 23:27:30 +08:00
parent 8c555c4e69
commit 9102ff031d
11 changed files with 148 additions and 43 deletions

View File

@@ -183,7 +183,8 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
}
}()
for {
retryCount := 0
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -195,9 +196,18 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
if err != nil {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue
} else {
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
cliCancel(err.Error)
@@ -247,8 +257,9 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
}
}()
retryCount := 0
outLoop:
for {
for retryCount <= h.Cfg.RequestRetry {
var errorResponse *interfaces.ErrorMessage
cliClient, errorResponse = h.GetClient(modelName)
if errorResponse != nil {
@@ -286,9 +297,18 @@ outLoop:
// Handle errors from the backend.
case err, okError := <-errChan:
if okError {
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
switch err.StatusCode {
case 429:
if h.Cfg.QuotaExceeded.SwitchProject {
log.Debugf("quota exceeded, switch client")
continue outLoop // Restart the client selection process
}
case 403, 408, 500, 502, 503, 504:
log.Debugf("http status code %d, switch client", err.StatusCode)
retryCount++
continue outLoop
} else {
default:
// Forward other errors directly to the client
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()