fix(executor): prevent streaming from 429 response when fallback available

Fix critical bug where ExecuteStream would create a streaming channel
using a 429 error response instead of continuing to the next fallback
model after exhausting retries.

When 429 retries were exhausted and a fallback model was available,
the inner retry loop would break but immediately fall through to the
streaming channel creation, attempting to stream from the failed 429
response instead of trying the next model.

Solution: Add shouldContinueToNextModel flag to explicitly skip the
streaming logic and continue the outer model loop when appropriate.

Identified by: codex-bot review
Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484479106
This commit is contained in:
Ben Vargas
2025-11-19 13:05:38 -07:00
parent ede4471b84
commit ed23472d94

View File

@@ -279,6 +279,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
var httpResp *http.Response
var payload []byte
var errDo error
shouldContinueToNextModel := false
// Inner retry loop for 429 errors on the same model
for retryCount := 0; retryCount <= maxRetries; retryCount++ {
@@ -364,6 +365,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
// Exhausted retries for this model, try next model if available
if idx+1 < len(models) {
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
shouldContinueToNextModel = true
break // Break inner loop to try next model
} else {
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel)
@@ -385,6 +387,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
break
}
// If we need to try the next fallback model, skip streaming logic
if shouldContinueToNextModel {
continue
}
out := make(chan cliproxyexecutor.StreamChunk)
stream = out
go func(resp *http.Response, reqBody []byte, attempt string) {