diff --git a/internal/api/claude-code-handlers.go b/internal/api/claude-code-handlers.go
index 7d14ad8a..ff825a7b 100644
--- a/internal/api/claude-code-handlers.go
+++ b/internal/api/claude-code-handlers.go
@@ -78,62 +78,16 @@ func (h *APIHandlers) ClaudeMessages(c *gin.Context) {
 	// This loop implements a sophisticated load balancing and failover mechanism
 outLoop:
 	for {
-		// Thread-safe client index rotation to distribute load evenly
-		// This ensures fair usage across all available clients
-		mutex.Lock()
-		startIndex := lastUsedClientIndex
-		currentIndex := (startIndex + 1) % len(h.cliClients)
-		lastUsedClientIndex = currentIndex
-		mutex.Unlock()
-
-		// Build a list of available clients, starting from the next client in rotation
-		// This implements round-robin load balancing while filtering out quota-exceeded clients
-		reorderedClients := make([]*client.Client, 0)
-		for i := 0; i < len(h.cliClients); i++ {
-			cliClient = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
-
-			// Skip clients that have exceeded their quota for the requested model
-			if cliClient.IsModelQuotaExceeded(modelName) {
-				// Log different messages based on authentication method (API key vs account)
-				if cliClient.GetGenerativeLanguageAPIKey() == "" {
-					log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
-				} else {
-					log.Debugf("Model %s is quota exceeded for generative language API Key: %s", modelName, cliClient.GetGenerativeLanguageAPIKey())
-				}
-
-				cliClient = nil
-				continue
-			}
-			reorderedClients = append(reorderedClients, cliClient)
-		}
-
-		// If all clients have exceeded quota, return a 429 Too Many Requests error
-		if len(reorderedClients) == 0 {
-			c.Status(429)
-			_, _ = fmt.Fprint(c.Writer, fmt.Sprintf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName))
+		var errorResponse *client.ErrorMessage
+		cliClient, errorResponse = h.getClient(modelName)
+		if errorResponse != nil {
+			c.Status(errorResponse.StatusCode)
+			_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
 			flusher.Flush()
 			cliCancel()
 			return
 		}
 
-		// Attempt to acquire a lock on an available client using non-blocking TryLock
-		// This prevents blocking when a client is busy with another request
-		locked := false
-		for i := 0; i < len(reorderedClients); i++ {
-			cliClient = reorderedClients[i]
-			if cliClient.RequestMutex.TryLock() {
-				locked = true
-				break
-			}
-		}
-
-		// If no client is immediately available, fall back to blocking on the first client
-		// This ensures the request will eventually be processed
-		if !locked {
-			cliClient = h.cliClients[0]
-			cliClient.RequestMutex.Lock()
-		}
-
 		// Determine the authentication method being used by the selected client
 		// This affects how responses are formatted and logged
 		isGlAPIKey := false
diff --git a/internal/api/handlers.go b/internal/api/handlers.go
index 589d787d..0d88a9b7 100644
--- a/internal/api/handlers.go
+++ b/internal/api/handlers.go
@@ -86,6 +86,10 @@ func (h *APIHandlers) Models(c *gin.Context) {
 }
 
 func (h *APIHandlers) getClient(modelName string) (*client.Client, *client.ErrorMessage) {
+	if len(h.cliClients) == 0 {
+		return nil, &client.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
+	}
+
 	var cliClient *client.Client
 
 	// Lock the mutex to update the last used client index
diff --git a/internal/auth/auth.go b/internal/auth/auth.go
index 208c90d2..0a41c8c3 100644
--- a/internal/auth/auth.go
+++ b/internal/auth/auth.go
@@ -169,7 +169,7 @@ func getTokenFromWeb(ctx context.Context, config *oauth2.Config) (*oauth2.Token,
 	errChan := make(chan error)
 
 	// Create a new HTTP server.
-	server := &http.Server{Addr: "localhost:8085"}
+	server := &http.Server{Addr: ":8085"}
 	config.RedirectURL = "http://localhost:8085/oauth2callback"
 
 	http.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) {