mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 04:50:52 +08:00
- Implemented `RefreshTokens` method in client interfaces and Gemini clients. - Updated handlers to call `RefreshTokens` on 401 responses and retry requests if token refresh succeeds. - Enhanced error handling and retry logic to accommodate token refresh flow.
413 lines
12 KiB
Go
413 lines
12 KiB
Go
// Package gemini provides HTTP handlers for Gemini API endpoints.
|
|
// This package implements handlers for managing Gemini model operations including
|
|
// model listing, content generation, streaming content generation, and token counting.
|
|
// It serves as a proxy layer between clients and the Gemini backend service,
|
|
// handling request translation, client management, and response processing.
|
|
package gemini
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/luispater/CLIProxyAPI/internal/api/handlers"
|
|
. "github.com/luispater/CLIProxyAPI/internal/constant"
|
|
"github.com/luispater/CLIProxyAPI/internal/interfaces"
|
|
"github.com/luispater/CLIProxyAPI/internal/registry"
|
|
"github.com/luispater/CLIProxyAPI/internal/util"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// GeminiAPIHandler contains the handlers for Gemini API endpoints.
|
|
// It holds a pool of clients to interact with the backend service.
|
|
type GeminiAPIHandler struct {
|
|
*handlers.BaseAPIHandler
|
|
}
|
|
|
|
// NewGeminiAPIHandler creates a new Gemini API handlers instance.
|
|
// It takes an BaseAPIHandler instance as input and returns a GeminiAPIHandler.
|
|
func NewGeminiAPIHandler(apiHandlers *handlers.BaseAPIHandler) *GeminiAPIHandler {
|
|
return &GeminiAPIHandler{
|
|
BaseAPIHandler: apiHandlers,
|
|
}
|
|
}
|
|
|
|
// HandlerType returns the identifier for this handler implementation.
|
|
func (h *GeminiAPIHandler) HandlerType() string {
|
|
return GEMINI
|
|
}
|
|
|
|
// Models returns the Gemini-compatible model metadata supported by this handler.
|
|
func (h *GeminiAPIHandler) Models() []map[string]any {
|
|
// Get dynamic models from the global registry
|
|
modelRegistry := registry.GetGlobalRegistry()
|
|
return modelRegistry.GetAvailableModels("gemini")
|
|
}
|
|
|
|
// GeminiModels handles the Gemini models listing endpoint.
|
|
// It returns a JSON response containing available Gemini models and their specifications.
|
|
func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
|
|
c.JSON(http.StatusOK, gin.H{
|
|
"models": h.Models(),
|
|
})
|
|
}
|
|
|
|
// GeminiGetHandler handles GET requests for specific Gemini model information.
|
|
// It returns detailed information about a specific Gemini model based on the action parameter.
|
|
func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) {
|
|
var request struct {
|
|
Action string `uri:"action" binding:"required"`
|
|
}
|
|
if err := c.ShouldBindUri(&request); err != nil {
|
|
c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: fmt.Sprintf("Invalid request: %v", err),
|
|
Type: "invalid_request_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
switch request.Action {
|
|
case "gemini-2.5-pro":
|
|
c.JSON(http.StatusOK, gin.H{
|
|
"name": "models/gemini-2.5-pro",
|
|
"version": "2.5",
|
|
"displayName": "Gemini 2.5 Pro",
|
|
"description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
|
|
"inputTokenLimit": 1048576,
|
|
"outputTokenLimit": 65536,
|
|
"supportedGenerationMethods": []string{
|
|
"generateContent",
|
|
"countTokens",
|
|
"createCachedContent",
|
|
"batchGenerateContent",
|
|
},
|
|
"temperature": 1,
|
|
"topP": 0.95,
|
|
"topK": 64,
|
|
"maxTemperature": 2,
|
|
"thinking": true,
|
|
},
|
|
)
|
|
case "gemini-2.5-flash":
|
|
c.JSON(http.StatusOK, gin.H{
|
|
"name": "models/gemini-2.5-flash",
|
|
"version": "001",
|
|
"displayName": "Gemini 2.5 Flash",
|
|
"description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
|
|
"inputTokenLimit": 1048576,
|
|
"outputTokenLimit": 65536,
|
|
"supportedGenerationMethods": []string{
|
|
"generateContent",
|
|
"countTokens",
|
|
"createCachedContent",
|
|
"batchGenerateContent",
|
|
},
|
|
"temperature": 1,
|
|
"topP": 0.95,
|
|
"topK": 64,
|
|
"maxTemperature": 2,
|
|
"thinking": true,
|
|
})
|
|
case "gpt-5":
|
|
c.JSON(http.StatusOK, gin.H{
|
|
"name": "gpt-5",
|
|
"version": "001",
|
|
"displayName": "GPT 5",
|
|
"description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
|
"inputTokenLimit": 400000,
|
|
"outputTokenLimit": 128000,
|
|
"supportedGenerationMethods": []string{
|
|
"generateContent",
|
|
},
|
|
"temperature": 1,
|
|
"topP": 0.95,
|
|
"topK": 64,
|
|
"maxTemperature": 2,
|
|
"thinking": true,
|
|
})
|
|
default:
|
|
c.JSON(http.StatusNotFound, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: "Not Found",
|
|
Type: "not_found",
|
|
},
|
|
})
|
|
}
|
|
}
|
|
|
|
// GeminiHandler handles POST requests for Gemini API operations.
|
|
// It routes requests to appropriate handlers based on the action parameter (model:method format).
|
|
func (h *GeminiAPIHandler) GeminiHandler(c *gin.Context) {
|
|
var request struct {
|
|
Action string `uri:"action" binding:"required"`
|
|
}
|
|
if err := c.ShouldBindUri(&request); err != nil {
|
|
c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: fmt.Sprintf("Invalid request: %v", err),
|
|
Type: "invalid_request_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
action := strings.Split(request.Action, ":")
|
|
if len(action) != 2 {
|
|
c.JSON(http.StatusNotFound, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: fmt.Sprintf("%s not found.", c.Request.URL.Path),
|
|
Type: "invalid_request_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
method := action[1]
|
|
rawJSON, _ := c.GetRawData()
|
|
|
|
switch method {
|
|
case "generateContent":
|
|
h.handleGenerateContent(c, action[0], rawJSON)
|
|
case "streamGenerateContent":
|
|
h.handleStreamGenerateContent(c, action[0], rawJSON)
|
|
case "countTokens":
|
|
h.handleCountTokens(c, action[0], rawJSON)
|
|
}
|
|
}
|
|
|
|
// handleStreamGenerateContent handles streaming content generation requests for Gemini models.
|
|
// This function establishes a Server-Sent Events connection and streams the generated content
|
|
// back to the client in real-time. It supports both SSE format and direct streaming based
|
|
// on the 'alt' query parameter.
|
|
//
|
|
// Parameters:
|
|
// - c: The Gin context for the request
|
|
// - modelName: The name of the Gemini model to use for content generation
|
|
// - rawJSON: The raw JSON request body containing generation parameters
|
|
func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName string, rawJSON []byte) {
|
|
alt := h.GetAlt(c)
|
|
|
|
if alt == "" {
|
|
c.Header("Content-Type", "text/event-stream")
|
|
c.Header("Cache-Control", "no-cache")
|
|
c.Header("Connection", "keep-alive")
|
|
c.Header("Access-Control-Allow-Origin", "*")
|
|
}
|
|
|
|
// Get the http.Flusher interface to manually flush the response.
|
|
flusher, ok := c.Writer.(http.Flusher)
|
|
if !ok {
|
|
c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
|
|
Error: handlers.ErrorDetail{
|
|
Message: "Streaming not supported",
|
|
Type: "server_error",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
|
|
|
|
var cliClient interfaces.Client
|
|
defer func() {
|
|
// Ensure the client's mutex is unlocked on function exit.
|
|
if cliClient != nil {
|
|
if mutex := cliClient.GetRequestMutex(); mutex != nil {
|
|
mutex.Unlock()
|
|
}
|
|
}
|
|
}()
|
|
|
|
retryCount := 0
|
|
outLoop:
|
|
for retryCount <= h.Cfg.RequestRetry {
|
|
var errorResponse *interfaces.ErrorMessage
|
|
cliClient, errorResponse = h.GetClient(modelName)
|
|
if errorResponse != nil {
|
|
c.Status(errorResponse.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
// Send the message and receive response chunks and errors via channels.
|
|
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, modelName, rawJSON, alt)
|
|
for {
|
|
select {
|
|
// Handle client disconnection.
|
|
case <-c.Request.Context().Done():
|
|
if c.Request.Context().Err().Error() == "context canceled" {
|
|
log.Debugf("gemini client disconnected: %v", c.Request.Context().Err())
|
|
cliCancel() // Cancel the backend request.
|
|
return
|
|
}
|
|
// Process incoming response chunks.
|
|
case chunk, okStream := <-respChan:
|
|
if !okStream {
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
if alt == "" {
|
|
_, _ = c.Writer.Write([]byte("data: "))
|
|
_, _ = c.Writer.Write(chunk)
|
|
_, _ = c.Writer.Write([]byte("\n\n"))
|
|
} else {
|
|
_, _ = c.Writer.Write(chunk)
|
|
}
|
|
flusher.Flush()
|
|
// Handle errors from the backend.
|
|
case err, okError := <-errChan:
|
|
if okError {
|
|
switch err.StatusCode {
|
|
case 429:
|
|
if h.Cfg.QuotaExceeded.SwitchProject {
|
|
log.Debugf("quota exceeded, switch client")
|
|
continue outLoop // Restart the client selection process
|
|
}
|
|
case 403, 408, 500, 502, 503, 504:
|
|
log.Debugf("http status code %d, switch client", err.StatusCode)
|
|
retryCount++
|
|
continue outLoop
|
|
default:
|
|
// Forward other errors directly to the client
|
|
c.Status(err.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
|
flusher.Flush()
|
|
cliCancel(err.Error)
|
|
}
|
|
return
|
|
}
|
|
// Send a keep-alive signal to the client.
|
|
case <-time.After(500 * time.Millisecond):
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleCountTokens handles token counting requests for Gemini models.
|
|
// This function counts the number of tokens in the provided content without
|
|
// generating a response. It's useful for quota management and content validation.
|
|
//
|
|
// Parameters:
|
|
// - c: The Gin context for the request
|
|
// - modelName: The name of the Gemini model to use for token counting
|
|
// - rawJSON: The raw JSON request body containing the content to count
|
|
func (h *GeminiAPIHandler) handleCountTokens(c *gin.Context, modelName string, rawJSON []byte) {
|
|
c.Header("Content-Type", "application/json")
|
|
|
|
alt := h.GetAlt(c)
|
|
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
|
|
|
|
var cliClient interfaces.Client
|
|
defer func() {
|
|
if cliClient != nil {
|
|
if mutex := cliClient.GetRequestMutex(); mutex != nil {
|
|
mutex.Unlock()
|
|
}
|
|
}
|
|
}()
|
|
|
|
for {
|
|
var errorResponse *interfaces.ErrorMessage
|
|
cliClient, errorResponse = h.GetClient(modelName, false)
|
|
if errorResponse != nil {
|
|
c.Status(errorResponse.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
resp, err := cliClient.SendRawTokenCount(cliCtx, modelName, rawJSON, alt)
|
|
if err != nil {
|
|
if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
|
|
continue
|
|
} else {
|
|
c.Status(err.StatusCode)
|
|
_, _ = c.Writer.Write([]byte(err.Error.Error()))
|
|
cliCancel(err.Error)
|
|
}
|
|
break
|
|
} else {
|
|
_, _ = c.Writer.Write(resp)
|
|
cliCancel(resp)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleGenerateContent handles non-streaming content generation requests for Gemini models.
|
|
// This function processes the request synchronously and returns the complete generated
|
|
// response in a single API call. It supports various generation parameters and
|
|
// response formats.
|
|
//
|
|
// Parameters:
|
|
// - c: The Gin context for the request
|
|
// - modelName: The name of the Gemini model to use for content generation
|
|
// - rawJSON: The raw JSON request body containing generation parameters and content
|
|
func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName string, rawJSON []byte) {
|
|
c.Header("Content-Type", "application/json")
|
|
|
|
alt := h.GetAlt(c)
|
|
|
|
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
|
|
|
|
var cliClient interfaces.Client
|
|
defer func() {
|
|
if cliClient != nil {
|
|
if mutex := cliClient.GetRequestMutex(); mutex != nil {
|
|
mutex.Unlock()
|
|
}
|
|
}
|
|
}()
|
|
|
|
retryCount := 0
|
|
for retryCount <= h.Cfg.RequestRetry {
|
|
var errorResponse *interfaces.ErrorMessage
|
|
cliClient, errorResponse = h.GetClient(modelName)
|
|
if errorResponse != nil {
|
|
c.Status(errorResponse.StatusCode)
|
|
_, _ = fmt.Fprint(c.Writer, errorResponse.Error.Error())
|
|
cliCancel()
|
|
return
|
|
}
|
|
|
|
resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, alt)
|
|
if err != nil {
|
|
switch err.StatusCode {
|
|
case 429:
|
|
if h.Cfg.QuotaExceeded.SwitchProject {
|
|
log.Debugf("quota exceeded, switch client")
|
|
continue // Restart the client selection process
|
|
}
|
|
case 403, 408, 500, 502, 503, 504:
|
|
log.Debugf("http status code %d, switch client", err.StatusCode)
|
|
retryCount++
|
|
continue
|
|
case 401:
|
|
log.Debugf("unauthorized request, try to refresh token, %s", util.HideAPIKey(cliClient.GetEmail()))
|
|
errRefreshTokens := cliClient.RefreshTokens(cliCtx)
|
|
if errRefreshTokens != nil {
|
|
log.Debugf("refresh token failed, switch client, %s", util.HideAPIKey(cliClient.GetEmail()))
|
|
}
|
|
retryCount++
|
|
continue
|
|
default:
|
|
// Forward other errors directly to the client
|
|
c.Status(err.StatusCode)
|
|
_, _ = c.Writer.Write([]byte(err.Error.Error()))
|
|
cliCancel(err.Error)
|
|
}
|
|
break
|
|
} else {
|
|
_, _ = c.Writer.Write(resp)
|
|
cliCancel(resp)
|
|
break
|
|
}
|
|
}
|
|
}
|