feat: improve error handling with added status codes and headers

- Updated Execute methods to include enhanced error handling via `StatusCode` and `Headers` extraction. - Introduced structured error responses for cooling down scenarios, providing additional metadata and retry suggestions. - Refined quota management, allowing for differentiation between cool-down, disabled, and other block reasons. - Improved model filtering logic based on client availability and suspension criteria.
2026-02-03 21:10:51 +08:00 · 2025-10-22 09:01:11 +08:00
parent 9678be7aa4
commit d225558dae
3 changed files with 211 additions and 32 deletions
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -352,14 +352,14 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo {
 	if model == nil {
 		return nil
 	}
-	copy := *model
+	copyModel := *model
 	if len(model.SupportedGenerationMethods) > 0 {
-		copy.SupportedGenerationMethods = append([]string(nil), model.SupportedGenerationMethods...)
+		copyModel.SupportedGenerationMethods = append([]string(nil), model.SupportedGenerationMethods...)
 	}
 	if len(model.SupportedParameters) > 0 {
-		copy.SupportedParameters = append([]string(nil), model.SupportedParameters...)
+		copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...)
 	}
-	return &copy
+	return &copyModel
 }
 // UnregisterClient removes a client and decrements counts for its models
@@ -532,17 +532,25 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
 			}
 		}
-		suspendedClients := 0
+		cooldownSuspended := 0
 		otherSuspended := 0
 		if registration.SuspendedClients != nil {
-			suspendedClients = len(registration.SuspendedClients)
+			for _, reason := range registration.SuspendedClients {
 				if strings.EqualFold(reason, "quota") {
 					cooldownSuspended++
 					continue
 				}
 				otherSuspended++
 			}
 		}
-		effectiveClients := availableClients - expiredClients - suspendedClients
+
 		effectiveClients := availableClients - expiredClients - otherSuspended
 		if effectiveClients < 0 {
 			effectiveClients = 0
 		}
-		// Only include models that have available clients
+		// Include models that have available clients, or those solely cooling down.
-		if effectiveClients > 0 {
+		if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
 			model := r.convertModelToMap(registration.Info, handlerType)
 			if model != nil {
 				models = append(models, model)
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -156,7 +156,19 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 	}
 	resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
 	if err != nil {
-		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+		status := http.StatusInternalServerError
 		if se, ok := err.(interface{ StatusCode() int }); ok && se != nil {
 			if code := se.StatusCode(); code > 0 {
 				status = code
 			}
 		}
 		var addon http.Header
 		if he, ok := err.(interface{ Headers() http.Header }); ok && he != nil {
 			if hdr := he.Headers(); hdr != nil {
 				addon = hdr.Clone()
 			}
 		}
 		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
 	return cloneBytes(resp.Payload), nil
 }
@@ -187,7 +199,19 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 	}
 	resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts)
 	if err != nil {
-		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+		status := http.StatusInternalServerError
 		if se, ok := err.(interface{ StatusCode() int }); ok && se != nil {
 			if code := se.StatusCode(); code > 0 {
 				status = code
 			}
 		}
 		var addon http.Header
 		if he, ok := err.(interface{ Headers() http.Header }); ok && he != nil {
 			if hdr := he.Headers(); hdr != nil {
 				addon = hdr.Clone()
 			}
 		}
 		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
 	return cloneBytes(resp.Payload), nil
 }
@@ -222,7 +246,19 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 	chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
 	if err != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
-		errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+		status := http.StatusInternalServerError
 		if se, ok := err.(interface{ StatusCode() int }); ok && se != nil {
 			if code := se.StatusCode(); code > 0 {
 				status = code
 			}
 		}
 		var addon http.Header
 		if he, ok := err.(interface{ Headers() http.Header }); ok && he != nil {
 			if hdr := he.Headers(); hdr != nil {
 				addon = hdr.Clone()
 			}
 		}
 		errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 		close(errChan)
 		return nil, errChan
 	}
@@ -233,7 +269,19 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		defer close(errChan)
 		for chunk := range chunks {
 			if chunk.Err != nil {
-				errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: chunk.Err}
+				status := http.StatusInternalServerError
 				if se, ok := chunk.Err.(interface{ StatusCode() int }); ok && se != nil {
 					if code := se.StatusCode(); code > 0 {
 						status = code
 					}
 				}
 				var addon http.Header
 				if he, ok := chunk.Err.(interface{ Headers() http.Header }); ok && he != nil {
 					if hdr := he.Headers(); hdr != nil {
 						addon = hdr.Clone()
 					}
 				}
 				errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: chunk.Err, Addon: addon}
 				return
 			}
 			if len(chunk.Payload) > 0 {
@@ -287,6 +335,17 @@ func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.Erro
 	if msg != nil && msg.StatusCode > 0 {
 		status = msg.StatusCode
 	}
 	if msg != nil && msg.Addon != nil {
 		for key, values := range msg.Addon {
 			if len(values) == 0 {
 				continue
 			}
 			c.Writer.Header().Del(key)
 			for _, value := range values {
 				c.Writer.Header().Add(key, value)
 			}
 		}
 	}
 	c.Status(status)
 	if msg != nil && msg.Error != nil {
 		_, _ = c.Writer.Write([]byte(msg.Error.Error()))
--- a/sdk/cliproxy/auth/selector.go
+++ b/sdk/cliproxy/auth/selector.go
@@ -2,7 +2,12 @@ package auth
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"math"
 	"net/http"
 	"sort"
 	"strconv"
 	"sync"
 	"time"
@@ -15,6 +20,84 @@ type RoundRobinSelector struct {
 	cursors map[string]int
 }
 type blockReason int
 const (
 	blockReasonNone blockReason = iota
 	blockReasonCooldown
 	blockReasonDisabled
 	blockReasonOther
 )
 type modelCooldownError struct {
 	model    string
 	resetIn  time.Duration
 	provider string
 }
 func newModelCooldownError(model, provider string, resetIn time.Duration) *modelCooldownError {
 	if resetIn < 0 {
 		resetIn = 0
 	}
 	return &modelCooldownError{
 		model:    model,
 		provider: provider,
 		resetIn:  resetIn,
 	}
 }
 func (e *modelCooldownError) Error() string {
 	modelName := e.model
 	if modelName == "" {
 		modelName = "requested model"
 	}
 	message := fmt.Sprintf("All credentials for model %s are cooling down", modelName)
 	if e.provider != "" {
 		message = fmt.Sprintf("%s via provider %s", message, e.provider)
 	}
 	resetSeconds := int(math.Ceil(e.resetIn.Seconds()))
 	if resetSeconds < 0 {
 		resetSeconds = 0
 	}
 	displayDuration := e.resetIn
 	if displayDuration > 0 && displayDuration < time.Second {
 		displayDuration = time.Second
 	} else {
 		displayDuration = displayDuration.Round(time.Second)
 	}
 	errorBody := map[string]any{
 		"code":          "model_cooldown",
 		"message":       message,
 		"model":         e.model,
 		"reset_time":    displayDuration.String(),
 		"reset_seconds": resetSeconds,
 	}
 	if e.provider != "" {
 		errorBody["provider"] = e.provider
 	}
 	payload := map[string]any{"error": errorBody}
 	data, err := json.Marshal(payload)
 	if err != nil {
 		return fmt.Sprintf(`{"error":{"code":"model_cooldown","message":"%s"}}`, message)
 	}
 	return string(data)
 }
 func (e *modelCooldownError) StatusCode() int {
 	return http.StatusTooManyRequests
 }
 func (e *modelCooldownError) Headers() http.Header {
 	headers := make(http.Header)
 	headers.Set("Content-Type", "application/json")
 	resetSeconds := int(math.Ceil(e.resetIn.Seconds()))
 	if resetSeconds < 0 {
 		resetSeconds = 0
 	}
 	headers.Set("Retry-After", strconv.Itoa(resetSeconds))
 	return headers
 }
 // Pick selects the next available auth for the provider in a round-robin manner.
 func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, opts cliproxyexecutor.Options, auths []*Auth) (*Auth, error) {
 	_ = ctx
@@ -27,14 +110,30 @@ func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, o
 	}
 	available := make([]*Auth, 0, len(auths))
 	now := time.Now()
 	cooldownCount := 0
 	var earliest time.Time
 	for i := 0; i < len(auths); i++ {
 		candidate := auths[i]
-		if isAuthBlockedForModel(candidate, model, now) {
+		blocked, reason, next := isAuthBlockedForModel(candidate, model, now)
 		if !blocked {
 			available = append(available, candidate)
 			continue
 		}
-		available = append(available, candidate)
+		if reason == blockReasonCooldown {
 			cooldownCount++
 			if !next.IsZero() && (earliest.IsZero() || next.Before(earliest)) {
 				earliest = next
 			}
 		}
 	}
 	if len(available) == 0 {
 		if cooldownCount == len(auths) && !earliest.IsZero() {
 			resetIn := earliest.Sub(now)
 			if resetIn < 0 {
 				resetIn = 0
 			}
 			return nil, newModelCooldownError(model, provider, resetIn)
 		}
 		return nil, &Error{Code: "auth_unavailable", Message: "no auth available"}
 	}
 	// Make round-robin deterministic even if caller's candidate order is unstable.
@@ -55,41 +154,54 @@ func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, o
 	return available[index%len(available)], nil
 }
-func isAuthBlockedForModel(auth *Auth, model string, now time.Time) bool {
+func isAuthBlockedForModel(auth *Auth, model string, now time.Time) (bool, blockReason, time.Time) {
 	if auth == nil {
-		return true
+		return true, blockReasonOther, time.Time{}
 	}
 	if auth.Disabled || auth.Status == StatusDisabled {
-		return true
+		return true, blockReasonDisabled, time.Time{}
 	}
 	// If a specific model is requested, prefer its per-model state over any aggregated
 	// auth-level unavailable flag. This prevents a failure on one model (e.g., 429 quota)
 	// from blocking other models of the same provider that have no errors.
 	if model != "" {
 		if len(auth.ModelStates) > 0 {
 			if state, ok := auth.ModelStates[model]; ok && state != nil {
 				if state.Status == StatusDisabled {
-					return true
+					return true, blockReasonDisabled, time.Time{}
 				}
 				if state.Unavailable {
 					if state.NextRetryAfter.IsZero() {
-						return false
+						return false, blockReasonNone, time.Time{}
 					}
 					if state.NextRetryAfter.After(now) {
-						return true
+						next := state.NextRetryAfter
 						if !state.Quota.NextRecoverAt.IsZero() && state.Quota.NextRecoverAt.After(now) {
 							next = state.Quota.NextRecoverAt
 						}
 						if next.Before(now) {
 							next = now
 						}
 						if state.Quota.Exceeded {
 							return true, blockReasonCooldown, next
 						}
 						return true, blockReasonOther, next
 					}
 				}
-				// Explicit state exists and is not blocking.
+				return false, blockReasonNone, time.Time{}
 				return false
 			}
 		}
-		// No explicit state for this model; do not block based on aggregated
+		return false, blockReasonNone, time.Time{}
 		// auth-level unavailable status. Allow trying this model.
 		return false
 	}
 	// No specific model context: fall back to auth-level unavailable window.
 	if auth.Unavailable && auth.NextRetryAfter.After(now) {
-		return true
+		next := auth.NextRetryAfter
 		if !auth.Quota.NextRecoverAt.IsZero() && auth.Quota.NextRecoverAt.After(now) {
 			next = auth.Quota.NextRecoverAt
 		}
 		if next.Before(now) {
 			next = now
 		}
 		if auth.Quota.Exceeded {
 			return true, blockReasonCooldown, next
 		}
 		return true, blockReasonOther, next
 	}
-	return false
+	return false, blockReasonNone, time.Time{}
 }