From 3569e5779a625c15d962e14e7c5a6ea7cf8d621d Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Tue, 21 Oct 2025 18:44:28 +0800 Subject: [PATCH] feat: enhance quota management with backoff levels and cooldown logic --- sdk/cliproxy/auth/manager.go | 38 +++++++++++++++++++++++++++++++++--- sdk/cliproxy/auth/types.go | 2 ++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/sdk/cliproxy/auth/manager.go b/sdk/cliproxy/auth/manager.go index 87507624..10832d31 100644 --- a/sdk/cliproxy/auth/manager.go +++ b/sdk/cliproxy/auth/manager.go @@ -40,6 +40,8 @@ const ( refreshCheckInterval = 5 * time.Second refreshPendingBackoff = time.Minute refreshFailureBackoff = 5 * time.Minute + quotaBackoffBase = time.Second + quotaBackoffMax = 30 * time.Minute ) // Result captures execution outcome used to adjust auth state. @@ -532,9 +534,15 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) { suspendReason = "payment_required" shouldSuspendModel = true case 429: - next := now.Add(30 * time.Minute) + cooldown, nextLevel := nextQuotaCooldown(state.Quota.BackoffLevel) + next := now.Add(cooldown) state.NextRetryAfter = next - state.Quota = QuotaState{Exceeded: true, Reason: "quota", NextRecoverAt: next} + state.Quota = QuotaState{ + Exceeded: true, + Reason: "quota", + NextRecoverAt: next, + BackoffLevel: nextLevel, + } suspendReason = "quota" shouldSuspendModel = true setModelQuota = true @@ -608,6 +616,7 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) { earliestRetry := time.Time{} quotaExceeded := false quotaRecover := time.Time{} + maxBackoffLevel := 0 for _, state := range auth.ModelStates { if state == nil { continue @@ -636,6 +645,9 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) { if quotaRecover.IsZero() || (!state.Quota.NextRecoverAt.IsZero() && state.Quota.NextRecoverAt.Before(quotaRecover)) { quotaRecover = state.Quota.NextRecoverAt } + if state.Quota.BackoffLevel > maxBackoffLevel { + maxBackoffLevel = state.Quota.BackoffLevel + } } } auth.Unavailable = allUnavailable @@ -648,10 +660,12 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) { auth.Quota.Exceeded = true auth.Quota.Reason = "quota" auth.Quota.NextRecoverAt = quotaRecover + auth.Quota.BackoffLevel = maxBackoffLevel } else { auth.Quota.Exceeded = false auth.Quota.Reason = "" auth.Quota.NextRecoverAt = time.Time{} + auth.Quota.BackoffLevel = 0 } } @@ -685,6 +699,7 @@ func clearAuthStateOnSuccess(auth *Auth, now time.Time) { auth.Quota.Exceeded = false auth.Quota.Reason = "" auth.Quota.NextRecoverAt = time.Time{} + auth.Quota.BackoffLevel = 0 auth.LastError = nil auth.NextRetryAfter = time.Time{} auth.UpdatedAt = now @@ -734,7 +749,9 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) { auth.StatusMessage = "quota exhausted" auth.Quota.Exceeded = true auth.Quota.Reason = "quota" - auth.Quota.NextRecoverAt = now.Add(30 * time.Minute) + cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel) + auth.Quota.NextRecoverAt = now.Add(cooldown) + auth.Quota.BackoffLevel = nextLevel auth.NextRetryAfter = auth.Quota.NextRecoverAt case 408, 500, 502, 503, 504: auth.StatusMessage = "transient upstream error" @@ -746,6 +763,21 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) { } } +// nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors. +func nextQuotaCooldown(prevLevel int) (time.Duration, int) { + if prevLevel < 0 { + prevLevel = 0 + } + cooldown := quotaBackoffBase * time.Duration(1<= quotaBackoffMax { + return quotaBackoffMax, prevLevel + } + return cooldown, prevLevel + 1 +} + // List returns all auth entries currently known by the manager. func (m *Manager) List() []*Auth { m.mu.RLock() diff --git a/sdk/cliproxy/auth/types.go b/sdk/cliproxy/auth/types.go index 05616cb7..35594bd8 100644 --- a/sdk/cliproxy/auth/types.go +++ b/sdk/cliproxy/auth/types.go @@ -65,6 +65,8 @@ type QuotaState struct { Reason string `json:"reason,omitempty"` // NextRecoverAt is when the credential may become available again. NextRecoverAt time.Time `json:"next_recover_at"` + // BackoffLevel stores the progressive cooldown exponent used for rate limits. + BackoffLevel int `json:"backoff_level,omitempty"` } // ModelState captures the execution state for a specific model under an auth entry.