Fix amp v1beta1 routing and gemini retry config

This commit is contained in:
Ben Vargas
2025-11-19 19:11:35 -07:00
parent 7ae00320dc
commit 3d8d02bfc3
3 changed files with 59 additions and 25 deletions

View File

@@ -10,6 +10,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude"
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/openai" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/openai"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
@@ -105,7 +106,31 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
geminiV1Beta1Fallback := NewFallbackHandler(func() *httputil.ReverseProxy { geminiV1Beta1Fallback := NewFallbackHandler(func() *httputil.ReverseProxy {
return m.proxy return m.proxy
}) })
ampAPI.POST("/provider/google/v1beta1/*path", geminiV1Beta1Fallback.WrapHandler(geminiBridge)) geminiV1Beta1Handler := geminiV1Beta1Fallback.WrapHandler(geminiBridge)
// Route POST model calls through Gemini bridge when a local provider exists, otherwise proxy.
// All other methods (e.g., GET model listing) always proxy to upstream to preserve Amp CLI behavior.
ampAPI.Any("/provider/google/v1beta1/*path", func(c *gin.Context) {
if c.Request.Method == "POST" {
// Attempt to extract the model name from the AMP-style path
if path := c.Param("path"); strings.Contains(path, "/models/") {
modelPart := path[strings.Index(path, "/models/")+len("/models/"):]
if colonIdx := strings.Index(modelPart, ":"); colonIdx > 0 {
modelPart = modelPart[:colonIdx]
}
if modelPart != "" {
normalized, _ := util.NormalizeGeminiThinkingModel(modelPart)
// Only handle locally when we have a provider; otherwise fall back to proxy
if providers := util.GetProviderName(normalized); len(providers) > 0 {
geminiV1Beta1Handler(c)
return
}
}
}
}
// Non-POST or no local provider available -> proxy upstream
proxyHandler(c)
})
} }
// registerProviderAliases registers /api/provider/{provider}/... routes // registerProviderAliases registers /api/provider/{provider}/... routes

View File

@@ -21,34 +21,40 @@ func TestRegisterManagementRoutes(t *testing.T) {
} }
m := &AmpModule{} m := &AmpModule{}
m.registerManagementRoutes(r, proxyHandler, false) // false = don't restrict to localhost in tests base := &handlers.BaseAPIHandler{}
m.registerManagementRoutes(r, base, proxyHandler, false) // false = don't restrict to localhost in tests
managementPaths := []string{ managementPaths := []struct {
"/api/internal", path string
"/api/internal/some/path", method string
"/api/user", }{
"/api/user/profile", {"/api/internal", http.MethodGet},
"/api/auth", {"/api/internal/some/path", http.MethodGet},
"/api/auth/login", {"/api/user", http.MethodGet},
"/api/meta", {"/api/user/profile", http.MethodGet},
"/api/telemetry", {"/api/auth", http.MethodGet},
"/api/threads", {"/api/auth/login", http.MethodGet},
"/api/otel", {"/api/meta", http.MethodGet},
"/api/provider/google/v1beta1/models", {"/api/telemetry", http.MethodGet},
{"/api/threads", http.MethodGet},
{"/api/otel", http.MethodGet},
// Google v1beta1 bridge should still proxy non-model requests (GET) and allow POST
{"/api/provider/google/v1beta1/models", http.MethodGet},
{"/api/provider/google/v1beta1/models", http.MethodPost},
} }
for _, path := range managementPaths { for _, path := range managementPaths {
t.Run(path, func(t *testing.T) { t.Run(path.path, func(t *testing.T) {
proxyCalled = false proxyCalled = false
req := httptest.NewRequest(http.MethodGet, path, nil) req := httptest.NewRequest(path.method, path.path, nil)
w := httptest.NewRecorder() w := httptest.NewRecorder()
r.ServeHTTP(w, req) r.ServeHTTP(w, req)
if w.Code == http.StatusNotFound { if w.Code == http.StatusNotFound {
t.Fatalf("route %s not registered", path) t.Fatalf("route %s not registered", path.path)
} }
if !proxyCalled { if !proxyCalled {
t.Fatalf("proxy handler not called for %s", path) t.Fatalf("proxy handler not called for %s", path.path)
} }
}) })
} }

View File

@@ -101,13 +101,13 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
// Get max retry count from config, default to 3 if not set // Get max retry count from config, default to 3 if not set
maxRetries := e.cfg.RequestRetry maxRetries := e.cfg.RequestRetry
if maxRetries <= 0 { if maxRetries < 0 {
maxRetries = 3 maxRetries = 3
} }
for idx, attemptModel := range models { for idx, attemptModel := range models {
// Inner retry loop for 429 errors on the same model retryCount := 0
for retryCount := 0; retryCount <= maxRetries; retryCount++ { for {
payload := append([]byte(nil), basePayload...) payload := append([]byte(nil), basePayload...)
if action == "countTokens" { if action == "countTokens" {
payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "project")
@@ -185,7 +185,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
if retryCount < maxRetries { if retryCount < maxRetries {
// Parse retry delay from Google's response // Parse retry delay from Google's response
retryDelay := parseRetryDelay(data) retryDelay := parseRetryDelay(data)
log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries) log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
retryCount++
// Wait for the specified delay // Wait for the specified delay
select { select {
@@ -271,7 +272,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
// Get max retry count from config, default to 3 if not set // Get max retry count from config, default to 3 if not set
maxRetries := e.cfg.RequestRetry maxRetries := e.cfg.RequestRetry
if maxRetries <= 0 { if maxRetries < 0 {
maxRetries = 3 maxRetries = 3
} }
@@ -281,8 +282,9 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
var errDo error var errDo error
shouldContinueToNextModel := false shouldContinueToNextModel := false
retryCount := 0
// Inner retry loop for 429 errors on the same model // Inner retry loop for 429 errors on the same model
for retryCount := 0; retryCount <= maxRetries; retryCount++ { for {
payload = append([]byte(nil), basePayload...) payload = append([]byte(nil), basePayload...)
payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel) payload = setJSONField(payload, "model", attemptModel)
@@ -349,7 +351,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
if retryCount < maxRetries { if retryCount < maxRetries {
// Parse retry delay from Google's response // Parse retry delay from Google's response
retryDelay := parseRetryDelay(data) retryDelay := parseRetryDelay(data)
log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries) log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (retry %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
retryCount++
// Wait for the specified delay // Wait for the specified delay
select { select {