feat: enhance quota management with backoff levels and cooldown logic

This commit is contained in:
Luis Pater
2025-10-21 18:44:28 +08:00
parent 20985d1a10
commit 3569e5779a
2 changed files with 37 additions and 3 deletions

View File

@@ -40,6 +40,8 @@ const (
refreshCheckInterval = 5 * time.Second
refreshPendingBackoff = time.Minute
refreshFailureBackoff = 5 * time.Minute
quotaBackoffBase = time.Second
quotaBackoffMax = 30 * time.Minute
)
// Result captures execution outcome used to adjust auth state.
@@ -532,9 +534,15 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
suspendReason = "payment_required"
shouldSuspendModel = true
case 429:
next := now.Add(30 * time.Minute)
cooldown, nextLevel := nextQuotaCooldown(state.Quota.BackoffLevel)
next := now.Add(cooldown)
state.NextRetryAfter = next
state.Quota = QuotaState{Exceeded: true, Reason: "quota", NextRecoverAt: next}
state.Quota = QuotaState{
Exceeded: true,
Reason: "quota",
NextRecoverAt: next,
BackoffLevel: nextLevel,
}
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
@@ -608,6 +616,7 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
earliestRetry := time.Time{}
quotaExceeded := false
quotaRecover := time.Time{}
maxBackoffLevel := 0
for _, state := range auth.ModelStates {
if state == nil {
continue
@@ -636,6 +645,9 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
if quotaRecover.IsZero() || (!state.Quota.NextRecoverAt.IsZero() && state.Quota.NextRecoverAt.Before(quotaRecover)) {
quotaRecover = state.Quota.NextRecoverAt
}
if state.Quota.BackoffLevel > maxBackoffLevel {
maxBackoffLevel = state.Quota.BackoffLevel
}
}
}
auth.Unavailable = allUnavailable
@@ -648,10 +660,12 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
auth.Quota.Exceeded = true
auth.Quota.Reason = "quota"
auth.Quota.NextRecoverAt = quotaRecover
auth.Quota.BackoffLevel = maxBackoffLevel
} else {
auth.Quota.Exceeded = false
auth.Quota.Reason = ""
auth.Quota.NextRecoverAt = time.Time{}
auth.Quota.BackoffLevel = 0
}
}
@@ -685,6 +699,7 @@ func clearAuthStateOnSuccess(auth *Auth, now time.Time) {
auth.Quota.Exceeded = false
auth.Quota.Reason = ""
auth.Quota.NextRecoverAt = time.Time{}
auth.Quota.BackoffLevel = 0
auth.LastError = nil
auth.NextRetryAfter = time.Time{}
auth.UpdatedAt = now
@@ -734,7 +749,9 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
auth.StatusMessage = "quota exhausted"
auth.Quota.Exceeded = true
auth.Quota.Reason = "quota"
auth.Quota.NextRecoverAt = now.Add(30 * time.Minute)
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
auth.Quota.NextRecoverAt = now.Add(cooldown)
auth.Quota.BackoffLevel = nextLevel
auth.NextRetryAfter = auth.Quota.NextRecoverAt
case 408, 500, 502, 503, 504:
auth.StatusMessage = "transient upstream error"
@@ -746,6 +763,21 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
}
}
// nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors.
func nextQuotaCooldown(prevLevel int) (time.Duration, int) {
if prevLevel < 0 {
prevLevel = 0
}
cooldown := quotaBackoffBase * time.Duration(1<<prevLevel)
if cooldown < quotaBackoffBase {
cooldown = quotaBackoffBase
}
if cooldown >= quotaBackoffMax {
return quotaBackoffMax, prevLevel
}
return cooldown, prevLevel + 1
}
// List returns all auth entries currently known by the manager.
func (m *Manager) List() []*Auth {
m.mu.RLock()