diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index 5e932fbd..294761c8 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -279,6 +279,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 		var httpResp *http.Response
 		var payload []byte
 		var errDo error
+		shouldContinueToNextModel := false
 
 		// Inner retry loop for 429 errors on the same model
 		for retryCount := 0; retryCount <= maxRetries; retryCount++ {
@@ -364,6 +365,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 						// Exhausted retries for this model, try next model if available
 						if idx+1 < len(models) {
 							log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
+							shouldContinueToNextModel = true
 							break // Break inner loop to try next model
 						} else {
 							log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel)
@@ -385,6 +387,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			break
 		}
 
+		// If we need to try the next fallback model, skip streaming logic
+		if shouldContinueToNextModel {
+			continue
+		}
+
 		out := make(chan cliproxyexecutor.StreamChunk)
 		stream = out
 		go func(resp *http.Response, reqBody []byte, attempt string) {