From 3d8d02bfc394dff9a1853d6a1656cc4c0333c81c Mon Sep 17 00:00:00 2001
From: Ben Vargas <ben@vargas.com>
Date: Wed, 19 Nov 2025 19:11:35 -0700
Subject: [PATCH] Fix amp v1beta1 routing and gemini retry config

---
 internal/api/modules/amp/routes.go            | 27 ++++++++++++-
 internal/api/modules/amp/routes_test.go       | 40 +++++++++++--------
 .../runtime/executor/gemini_cli_executor.go   | 17 ++++----
 3 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/internal/api/modules/amp/routes.go b/internal/api/modules/amp/routes.go
index e0c11b5a..0f584b5d 100644
--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -10,6 +10,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/openai"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )
 
@@ -105,7 +106,31 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	geminiV1Beta1Fallback := NewFallbackHandler(func() *httputil.ReverseProxy {
 		return m.proxy
 	})
-	ampAPI.POST("/provider/google/v1beta1/*path", geminiV1Beta1Fallback.WrapHandler(geminiBridge))
+	geminiV1Beta1Handler := geminiV1Beta1Fallback.WrapHandler(geminiBridge)
+
+	// Route POST model calls through Gemini bridge when a local provider exists, otherwise proxy.
+	// All other methods (e.g., GET model listing) always proxy to upstream to preserve Amp CLI behavior.
+	ampAPI.Any("/provider/google/v1beta1/*path", func(c *gin.Context) {
+		if c.Request.Method == "POST" {
+			// Attempt to extract the model name from the AMP-style path
+			if path := c.Param("path"); strings.Contains(path, "/models/") {
+				modelPart := path[strings.Index(path, "/models/")+len("/models/"):]
+				if colonIdx := strings.Index(modelPart, ":"); colonIdx > 0 {
+					modelPart = modelPart[:colonIdx]
+				}
+				if modelPart != "" {
+					normalized, _ := util.NormalizeGeminiThinkingModel(modelPart)
+					// Only handle locally when we have a provider; otherwise fall back to proxy
+					if providers := util.GetProviderName(normalized); len(providers) > 0 {
+						geminiV1Beta1Handler(c)
+						return
+					}
+				}
+			}
+		}
+		// Non-POST or no local provider available -> proxy upstream
+		proxyHandler(c)
+	})
 }
 
 // registerProviderAliases registers /api/provider/{provider}/... routes
diff --git a/internal/api/modules/amp/routes_test.go b/internal/api/modules/amp/routes_test.go
index 953b93bd..38da1ed6 100644
--- a/internal/api/modules/amp/routes_test.go
+++ b/internal/api/modules/amp/routes_test.go
@@ -21,34 +21,40 @@ func TestRegisterManagementRoutes(t *testing.T) {
 	}
 
 	m := &AmpModule{}
-	m.registerManagementRoutes(r, proxyHandler, false) // false = don't restrict to localhost in tests
+	base := &handlers.BaseAPIHandler{}
+	m.registerManagementRoutes(r, base, proxyHandler, false) // false = don't restrict to localhost in tests
 
-	managementPaths := []string{
-		"/api/internal",
-		"/api/internal/some/path",
-		"/api/user",
-		"/api/user/profile",
-		"/api/auth",
-		"/api/auth/login",
-		"/api/meta",
-		"/api/telemetry",
-		"/api/threads",
-		"/api/otel",
-		"/api/provider/google/v1beta1/models",
+	managementPaths := []struct {
+		path   string
+		method string
+	}{
+		{"/api/internal", http.MethodGet},
+		{"/api/internal/some/path", http.MethodGet},
+		{"/api/user", http.MethodGet},
+		{"/api/user/profile", http.MethodGet},
+		{"/api/auth", http.MethodGet},
+		{"/api/auth/login", http.MethodGet},
+		{"/api/meta", http.MethodGet},
+		{"/api/telemetry", http.MethodGet},
+		{"/api/threads", http.MethodGet},
+		{"/api/otel", http.MethodGet},
+		// Google v1beta1 bridge should still proxy non-model requests (GET) and allow POST
+		{"/api/provider/google/v1beta1/models", http.MethodGet},
+		{"/api/provider/google/v1beta1/models", http.MethodPost},
 	}
 
 	for _, path := range managementPaths {
-		t.Run(path, func(t *testing.T) {
+		t.Run(path.path, func(t *testing.T) {
 			proxyCalled = false
-			req := httptest.NewRequest(http.MethodGet, path, nil)
+			req := httptest.NewRequest(path.method, path.path, nil)
 			w := httptest.NewRecorder()
 			r.ServeHTTP(w, req)
 
 			if w.Code == http.StatusNotFound {
-				t.Fatalf("route %s not registered", path)
+				t.Fatalf("route %s not registered", path.path)
 			}
 			if !proxyCalled {
-				t.Fatalf("proxy handler not called for %s", path)
+				t.Fatalf("proxy handler not called for %s", path.path)
 			}
 		})
 	}
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index 2f48871b..29b21fd4 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -101,13 +101,13 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 
 	// Get max retry count from config, default to 3 if not set
 	maxRetries := e.cfg.RequestRetry
-	if maxRetries <= 0 {
+	if maxRetries < 0 {
 		maxRetries = 3
 	}
 
 	for idx, attemptModel := range models {
-		// Inner retry loop for 429 errors on the same model
-		for retryCount := 0; retryCount <= maxRetries; retryCount++ {
+		retryCount := 0
+		for {
 			payload := append([]byte(nil), basePayload...)
 			if action == "countTokens" {
 				payload = deleteJSONField(payload, "project")
@@ -185,7 +185,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 				if retryCount < maxRetries {
 					// Parse retry delay from Google's response
 					retryDelay := parseRetryDelay(data)
-					log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
+					log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
+					retryCount++
 
 					// Wait for the specified delay
 					select {
@@ -271,7 +272,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 
 	// Get max retry count from config, default to 3 if not set
 	maxRetries := e.cfg.RequestRetry
-	if maxRetries <= 0 {
+	if maxRetries < 0 {
 		maxRetries = 3
 	}
 
@@ -281,8 +282,9 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 		var errDo error
 		shouldContinueToNextModel := false
 
+		retryCount := 0
 		// Inner retry loop for 429 errors on the same model
-		for retryCount := 0; retryCount <= maxRetries; retryCount++ {
+		for {
 			payload = append([]byte(nil), basePayload...)
 			payload = setJSONField(payload, "project", projectID)
 			payload = setJSONField(payload, "model", attemptModel)
@@ -349,7 +351,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 					if retryCount < maxRetries {
 						// Parse retry delay from Google's response
 						retryDelay := parseRetryDelay(data)
-						log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
+						log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
+						retryCount++
 
 						// Wait for the specified delay
 						select {