From 3d8d02bfc394dff9a1853d6a1656cc4c0333c81c Mon Sep 17 00:00:00 2001 From: Ben Vargas Date: Wed, 19 Nov 2025 19:11:35 -0700 Subject: [PATCH] Fix amp v1beta1 routing and gemini retry config --- internal/api/modules/amp/routes.go | 27 ++++++++++++- internal/api/modules/amp/routes_test.go | 40 +++++++++++-------- .../runtime/executor/gemini_cli_executor.go | 17 ++++---- 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/internal/api/modules/amp/routes.go b/internal/api/modules/amp/routes.go index e0c11b5a..0f584b5d 100644 --- a/internal/api/modules/amp/routes.go +++ b/internal/api/modules/amp/routes.go @@ -10,6 +10,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/openai" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" ) @@ -105,7 +106,31 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha geminiV1Beta1Fallback := NewFallbackHandler(func() *httputil.ReverseProxy { return m.proxy }) - ampAPI.POST("/provider/google/v1beta1/*path", geminiV1Beta1Fallback.WrapHandler(geminiBridge)) + geminiV1Beta1Handler := geminiV1Beta1Fallback.WrapHandler(geminiBridge) + + // Route POST model calls through Gemini bridge when a local provider exists, otherwise proxy. + // All other methods (e.g., GET model listing) always proxy to upstream to preserve Amp CLI behavior. + ampAPI.Any("/provider/google/v1beta1/*path", func(c *gin.Context) { + if c.Request.Method == "POST" { + // Attempt to extract the model name from the AMP-style path + if path := c.Param("path"); strings.Contains(path, "/models/") { + modelPart := path[strings.Index(path, "/models/")+len("/models/"):] + if colonIdx := strings.Index(modelPart, ":"); colonIdx > 0 { + modelPart = modelPart[:colonIdx] + } + if modelPart != "" { + normalized, _ := util.NormalizeGeminiThinkingModel(modelPart) + // Only handle locally when we have a provider; otherwise fall back to proxy + if providers := util.GetProviderName(normalized); len(providers) > 0 { + geminiV1Beta1Handler(c) + return + } + } + } + } + // Non-POST or no local provider available -> proxy upstream + proxyHandler(c) + }) } // registerProviderAliases registers /api/provider/{provider}/... routes diff --git a/internal/api/modules/amp/routes_test.go b/internal/api/modules/amp/routes_test.go index 953b93bd..38da1ed6 100644 --- a/internal/api/modules/amp/routes_test.go +++ b/internal/api/modules/amp/routes_test.go @@ -21,34 +21,40 @@ func TestRegisterManagementRoutes(t *testing.T) { } m := &AmpModule{} - m.registerManagementRoutes(r, proxyHandler, false) // false = don't restrict to localhost in tests + base := &handlers.BaseAPIHandler{} + m.registerManagementRoutes(r, base, proxyHandler, false) // false = don't restrict to localhost in tests - managementPaths := []string{ - "/api/internal", - "/api/internal/some/path", - "/api/user", - "/api/user/profile", - "/api/auth", - "/api/auth/login", - "/api/meta", - "/api/telemetry", - "/api/threads", - "/api/otel", - "/api/provider/google/v1beta1/models", + managementPaths := []struct { + path string + method string + }{ + {"/api/internal", http.MethodGet}, + {"/api/internal/some/path", http.MethodGet}, + {"/api/user", http.MethodGet}, + {"/api/user/profile", http.MethodGet}, + {"/api/auth", http.MethodGet}, + {"/api/auth/login", http.MethodGet}, + {"/api/meta", http.MethodGet}, + {"/api/telemetry", http.MethodGet}, + {"/api/threads", http.MethodGet}, + {"/api/otel", http.MethodGet}, + // Google v1beta1 bridge should still proxy non-model requests (GET) and allow POST + {"/api/provider/google/v1beta1/models", http.MethodGet}, + {"/api/provider/google/v1beta1/models", http.MethodPost}, } for _, path := range managementPaths { - t.Run(path, func(t *testing.T) { + t.Run(path.path, func(t *testing.T) { proxyCalled = false - req := httptest.NewRequest(http.MethodGet, path, nil) + req := httptest.NewRequest(path.method, path.path, nil) w := httptest.NewRecorder() r.ServeHTTP(w, req) if w.Code == http.StatusNotFound { - t.Fatalf("route %s not registered", path) + t.Fatalf("route %s not registered", path.path) } if !proxyCalled { - t.Fatalf("proxy handler not called for %s", path) + t.Fatalf("proxy handler not called for %s", path.path) } }) } diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 2f48871b..29b21fd4 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -101,13 +101,13 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth // Get max retry count from config, default to 3 if not set maxRetries := e.cfg.RequestRetry - if maxRetries <= 0 { + if maxRetries < 0 { maxRetries = 3 } for idx, attemptModel := range models { - // Inner retry loop for 429 errors on the same model - for retryCount := 0; retryCount <= maxRetries; retryCount++ { + retryCount := 0 + for { payload := append([]byte(nil), basePayload...) if action == "countTokens" { payload = deleteJSONField(payload, "project") @@ -185,7 +185,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth if retryCount < maxRetries { // Parse retry delay from Google's response retryDelay := parseRetryDelay(data) - log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries) + log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries) + retryCount++ // Wait for the specified delay select { @@ -271,7 +272,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut // Get max retry count from config, default to 3 if not set maxRetries := e.cfg.RequestRetry - if maxRetries <= 0 { + if maxRetries < 0 { maxRetries = 3 } @@ -281,8 +282,9 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var errDo error shouldContinueToNextModel := false + retryCount := 0 // Inner retry loop for 429 errors on the same model - for retryCount := 0; retryCount <= maxRetries; retryCount++ { + for { payload = append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) @@ -349,7 +351,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut if retryCount < maxRetries { // Parse retry delay from Google's response retryDelay := parseRetryDelay(data) - log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries) + log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries) + retryCount++ // Wait for the specified delay select {