From ed23472d9427d55a68656a898902797b62a15a7e Mon Sep 17 00:00:00 2001 From: Ben Vargas Date: Wed, 19 Nov 2025 13:05:38 -0700 Subject: [PATCH] fix(executor): prevent streaming from 429 response when fallback available Fix critical bug where ExecuteStream would create a streaming channel using a 429 error response instead of continuing to the next fallback model after exhausting retries. When 429 retries were exhausted and a fallback model was available, the inner retry loop would break but immediately fall through to the streaming channel creation, attempting to stream from the failed 429 response instead of trying the next model. Solution: Add shouldContinueToNextModel flag to explicitly skip the streaming logic and continue the outer model loop when appropriate. Identified by: codex-bot review Ref: https://github.com/router-for-me/CLIProxyAPI/pull/280#pullrequestreview-3484479106 --- internal/runtime/executor/gemini_cli_executor.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 5e932fbd..294761c8 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -279,6 +279,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var httpResp *http.Response var payload []byte var errDo error + shouldContinueToNextModel := false // Inner retry loop for 429 errors on the same model for retryCount := 0; retryCount <= maxRetries; retryCount++ { @@ -364,6 +365,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut // Exhausted retries for this model, try next model if available if idx+1 < len(models) { log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1]) + shouldContinueToNextModel = true break // Break inner loop to try next model } else { log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel) @@ -385,6 +387,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut break } + // If we need to try the next fallback model, skip streaming logic + if shouldContinueToNextModel { + continue + } + out := make(chan cliproxyexecutor.StreamChunk) stream = out go func(resp *http.Response, reqBody []byte, attempt string) {