Refactor client handling and improve error responses

- Centralized client retrieval logic with `getClient` function for reduced redundancy.
- Simplified client rotation and error handling by removing excessive load balancing logic.
- Updated server address in `auth.go` to use dynamic binding (`:8085`).
This commit is contained in:
Luis Pater
2025-07-15 17:03:18 +08:00
parent 368796349e
commit f49a530c1a
3 changed files with 10 additions and 52 deletions

View File

@@ -78,62 +78,16 @@ func (h *APIHandlers) ClaudeMessages(c *gin.Context) {
// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
for {
// Thread-safe client index rotation to distribute load evenly
// This ensures fair usage across all available clients
mutex.Lock()
startIndex := lastUsedClientIndex
currentIndex := (startIndex + 1) % len(h.cliClients)
lastUsedClientIndex = currentIndex
mutex.Unlock()
// Build a list of available clients, starting from the next client in rotation
// This implements round-robin load balancing while filtering out quota-exceeded clients
reorderedClients := make([]*client.Client, 0)
for i := 0; i < len(h.cliClients); i++ {
cliClient = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
// Skip clients that have exceeded their quota for the requested model
if cliClient.IsModelQuotaExceeded(modelName) {
// Log different messages based on authentication method (API key vs account)
if cliClient.GetGenerativeLanguageAPIKey() == "" {
log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
} else {
log.Debugf("Model %s is quota exceeded for generative language API Key: %s", modelName, cliClient.GetGenerativeLanguageAPIKey())
}
cliClient = nil
continue
}
reorderedClients = append(reorderedClients, cliClient)
}
// If all clients have exceeded quota, return a 429 Too Many Requests error
if len(reorderedClients) == 0 {
c.Status(429)
_, _ = fmt.Fprint(c.Writer, fmt.Sprintf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName))
var errorResponse *client.ErrorMessage
cliClient, errorResponse = h.getClient(modelName)
if errorResponse != nil {
c.Status(errorResponse.StatusCode)
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
flusher.Flush()
cliCancel()
return
}
// Attempt to acquire a lock on an available client using non-blocking TryLock
// This prevents blocking when a client is busy with another request
locked := false
for i := 0; i < len(reorderedClients); i++ {
cliClient = reorderedClients[i]
if cliClient.RequestMutex.TryLock() {
locked = true
break
}
}
// If no client is immediately available, fall back to blocking on the first client
// This ensures the request will eventually be processed
if !locked {
cliClient = h.cliClients[0]
cliClient.RequestMutex.Lock()
}
// Determine the authentication method being used by the selected client
// This affects how responses are formatted and logged
isGlAPIKey := false

View File

@@ -86,6 +86,10 @@ func (h *APIHandlers) Models(c *gin.Context) {
}
func (h *APIHandlers) getClient(modelName string) (*client.Client, *client.ErrorMessage) {
if len(h.cliClients) == 0 {
return nil, &client.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
}
var cliClient *client.Client
// Lock the mutex to update the last used client index

View File

@@ -169,7 +169,7 @@ func getTokenFromWeb(ctx context.Context, config *oauth2.Config) (*oauth2.Token,
errChan := make(chan error)
// Create a new HTTP server.
server := &http.Server{Addr: "localhost:8085"}
server := &http.Server{Addr: ":8085"}
config.RedirectURL = "http://localhost:8085/oauth2callback"
http.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) {