mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-19 04:40:52 +08:00
feat(executor): add intelligent retry logic for 429 rate limits
Implement Google RetryInfo.retryDelay support for handling 429 rate limit errors. Retries same model up to 3 times using exact delays from Google's API before trying fallback models. - Add parseRetryDelay() to extract Google's retry guidance - Implement inner retry loop in Execute() and ExecuteStream() - Context-aware waiting with cancellation support - Cap delays at 60s maximum for safety
This commit is contained in:
@@ -99,7 +99,15 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
var lastStatus int
|
var lastStatus int
|
||||||
var lastBody []byte
|
var lastBody []byte
|
||||||
|
|
||||||
|
// Get max retry count from config, default to 3 if not set
|
||||||
|
maxRetries := e.cfg.RequestRetry
|
||||||
|
if maxRetries <= 0 {
|
||||||
|
maxRetries = 3
|
||||||
|
}
|
||||||
|
|
||||||
for idx, attemptModel := range models {
|
for idx, attemptModel := range models {
|
||||||
|
// Inner retry loop for 429 errors on the same model
|
||||||
|
for retryCount := 0; retryCount <= maxRetries; retryCount++ {
|
||||||
payload := append([]byte(nil), basePayload...)
|
payload := append([]byte(nil), basePayload...)
|
||||||
if action == "countTokens" {
|
if action == "countTokens" {
|
||||||
payload = deleteJSONField(payload, "project")
|
payload = deleteJSONField(payload, "project")
|
||||||
@@ -171,19 +179,45 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
lastStatus = httpResp.StatusCode
|
lastStatus = httpResp.StatusCode
|
||||||
lastBody = append([]byte(nil), data...)
|
lastBody = append([]byte(nil), data...)
|
||||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||||
if httpResp.StatusCode == 429 {
|
|
||||||
if idx+1 < len(models) {
|
|
||||||
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
|
||||||
} else {
|
|
||||||
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Handle 429 rate limit errors with retry
|
||||||
|
if httpResp.StatusCode == 429 {
|
||||||
|
if retryCount < maxRetries {
|
||||||
|
// Parse retry delay from Google's response
|
||||||
|
retryDelay := parseRetryDelay(data)
|
||||||
|
log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
|
||||||
|
|
||||||
|
// Wait for the specified delay
|
||||||
|
select {
|
||||||
|
case <-time.After(retryDelay):
|
||||||
|
// Continue to next retry iteration
|
||||||
|
continue
|
||||||
|
case <-ctx.Done():
|
||||||
|
// Context cancelled, return immediately
|
||||||
|
err = ctx.Err()
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Exhausted retries for this model, try next model if available
|
||||||
|
if idx+1 < len(models) {
|
||||||
|
log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
|
||||||
|
break // Break inner loop to try next model
|
||||||
|
} else {
|
||||||
|
log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, no additional fallback model", maxRetries, attemptModel)
|
||||||
|
// No more models to try, will return error below
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Non-429 error, don't retry this model
|
||||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Break inner loop if we hit this point (no retry needed or exhausted retries)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if len(lastBody) > 0 {
|
if len(lastBody) > 0 {
|
||||||
appendAPIResponseChunk(ctx, e.cfg, lastBody)
|
appendAPIResponseChunk(ctx, e.cfg, lastBody)
|
||||||
}
|
}
|
||||||
@@ -235,8 +269,20 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
var lastStatus int
|
var lastStatus int
|
||||||
var lastBody []byte
|
var lastBody []byte
|
||||||
|
|
||||||
|
// Get max retry count from config, default to 3 if not set
|
||||||
|
maxRetries := e.cfg.RequestRetry
|
||||||
|
if maxRetries <= 0 {
|
||||||
|
maxRetries = 3
|
||||||
|
}
|
||||||
|
|
||||||
for idx, attemptModel := range models {
|
for idx, attemptModel := range models {
|
||||||
payload := append([]byte(nil), basePayload...)
|
var httpResp *http.Response
|
||||||
|
var payload []byte
|
||||||
|
var errDo error
|
||||||
|
|
||||||
|
// Inner retry loop for 429 errors on the same model
|
||||||
|
for retryCount := 0; retryCount <= maxRetries; retryCount++ {
|
||||||
|
payload = append([]byte(nil), basePayload...)
|
||||||
payload = setJSONField(payload, "project", projectID)
|
payload = setJSONField(payload, "project", projectID)
|
||||||
payload = setJSONField(payload, "model", attemptModel)
|
payload = setJSONField(payload, "model", attemptModel)
|
||||||
|
|
||||||
@@ -275,7 +321,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
AuthValue: authValue,
|
AuthValue: authValue,
|
||||||
})
|
})
|
||||||
|
|
||||||
httpResp, errDo := httpClient.Do(reqHTTP)
|
httpResp, errDo = httpClient.Do(reqHTTP)
|
||||||
if errDo != nil {
|
if errDo != nil {
|
||||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||||
err = errDo
|
err = errDo
|
||||||
@@ -296,18 +342,49 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
lastStatus = httpResp.StatusCode
|
lastStatus = httpResp.StatusCode
|
||||||
lastBody = append([]byte(nil), data...)
|
lastBody = append([]byte(nil), data...)
|
||||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||||
|
|
||||||
|
// Handle 429 rate limit errors with retry
|
||||||
if httpResp.StatusCode == 429 {
|
if httpResp.StatusCode == 429 {
|
||||||
if idx+1 < len(models) {
|
if retryCount < maxRetries {
|
||||||
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
// Parse retry delay from Google's response
|
||||||
} else {
|
retryDelay := parseRetryDelay(data)
|
||||||
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
|
||||||
}
|
|
||||||
|
// Wait for the specified delay
|
||||||
|
select {
|
||||||
|
case <-time.After(retryDelay):
|
||||||
|
// Continue to next retry iteration
|
||||||
continue
|
continue
|
||||||
|
case <-ctx.Done():
|
||||||
|
// Context cancelled, return immediately
|
||||||
|
err = ctx.Err()
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Exhausted retries for this model, try next model if available
|
||||||
|
if idx+1 < len(models) {
|
||||||
|
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
|
||||||
|
break // Break inner loop to try next model
|
||||||
|
} else {
|
||||||
|
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel)
|
||||||
|
// No more models to try, will return error below
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Non-429 error, don't retry this model
|
||||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Break inner loop if we hit this point (no retry needed or exhausted retries)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Success - httpResp.StatusCode is 2xx, break out of retry loop
|
||||||
|
// and proceed to streaming logic below
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
out := make(chan cliproxyexecutor.StreamChunk)
|
out := make(chan cliproxyexecutor.StreamChunk)
|
||||||
stream = out
|
stream = out
|
||||||
go func(resp *http.Response, reqBody []byte, attempt string) {
|
go func(resp *http.Response, reqBody []byte, attempt string) {
|
||||||
@@ -769,3 +846,48 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
|
|||||||
}
|
}
|
||||||
return rawJSON
|
return rawJSON
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
|
||||||
|
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
|
||||||
|
// Returns the duration to wait, or a default duration if parsing fails.
|
||||||
|
func parseRetryDelay(errorBody []byte) time.Duration {
|
||||||
|
const defaultDelay = 1 * time.Second
|
||||||
|
const maxDelay = 60 * time.Second
|
||||||
|
|
||||||
|
// Try to parse the retryDelay from the error response
|
||||||
|
// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
|
||||||
|
details := gjson.GetBytes(errorBody, "error.details")
|
||||||
|
if !details.Exists() || !details.IsArray() {
|
||||||
|
log.Debugf("parseRetryDelay: no error.details found, using default delay %v", defaultDelay)
|
||||||
|
return defaultDelay
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, detail := range details.Array() {
|
||||||
|
typeVal := detail.Get("@type").String()
|
||||||
|
if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
|
||||||
|
retryDelay := detail.Get("retryDelay").String()
|
||||||
|
if retryDelay != "" {
|
||||||
|
// Parse duration string like "0.847655010s"
|
||||||
|
duration, err := time.ParseDuration(retryDelay)
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("parseRetryDelay: failed to parse duration %q: %v, using default", retryDelay, err)
|
||||||
|
return defaultDelay
|
||||||
|
}
|
||||||
|
// Cap at maxDelay to prevent excessive waits
|
||||||
|
if duration > maxDelay {
|
||||||
|
log.Debugf("parseRetryDelay: capping delay from %v to %v", duration, maxDelay)
|
||||||
|
return maxDelay
|
||||||
|
}
|
||||||
|
if duration < 0 {
|
||||||
|
log.Debugf("parseRetryDelay: negative delay %v, using default", duration)
|
||||||
|
return defaultDelay
|
||||||
|
}
|
||||||
|
log.Debugf("parseRetryDelay: using delay %v from API response", duration)
|
||||||
|
return duration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("parseRetryDelay: no RetryInfo found, using default delay %v", defaultDelay)
|
||||||
|
return defaultDelay
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user