diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 5e932fbd..294761c8 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -279,6 +279,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var httpResp *http.Response var payload []byte var errDo error + shouldContinueToNextModel := false // Inner retry loop for 429 errors on the same model for retryCount := 0; retryCount <= maxRetries; retryCount++ { @@ -364,6 +365,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut // Exhausted retries for this model, try next model if available if idx+1 < len(models) { log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1]) + shouldContinueToNextModel = true break // Break inner loop to try next model } else { log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel) @@ -385,6 +387,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut break } + // If we need to try the next fallback model, skip streaming logic + if shouldContinueToNextModel { + continue + } + out := make(chan cliproxyexecutor.StreamChunk) stream = out go func(resp *http.Response, reqBody []byte, attempt string) {