feat(registry): unify Gemini models and add AI Studio set

2026-02-03 04:50:52 +08:00 · 2025-10-28 19:00:25 +08:00
parent 5891785125
commit 5dced4c0a6
5 changed files with 106 additions and 164 deletions
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -225,7 +225,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	envManagementSecret := envAdminPasswordSet && envAdminPassword != ""

 	// Create server instance
-		providerNames := make([]string, 0, len(cfg.OpenAICompatibility))
+	providerNames := make([]string, 0, len(cfg.OpenAICompatibility))
 	for _, p := range cfg.OpenAICompatibility {
 		providerNames = append(providerNames, p.Name)
 	}
@@ -914,5 +914,3 @@ func AuthMiddleware(manager *sdkaccess.Manager) gin.HandlerFunc {
 		}
 	}
 }
-
-// legacy clientsToSlice removed; handlers no longer consume legacy client slices
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -68,84 +68,8 @@ func GetClaudeModels() []*ModelInfo {
 	}
 }

-// GetGeminiModels returns the standard Gemini model definitions
-func GetGeminiModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		},
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Flash Lite",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		},
-		{
-			ID:                         "gemini-2.5-flash-image-preview",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image-preview",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image Preview",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		},
-		{
-			ID:                         "gemini-2.5-flash-image",
-			Object:                     "model",
-			Created:                    time.Now().Unix(),
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		},
-	}
-}
-
-// GetGeminiCLIModels returns the standard Gemini model definitions
-func GetGeminiCLIModels() []*ModelInfo {
+// GeminiModels returns the shared base Gemini model set used by multiple providers.
+func GeminiModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-flash",
@@ -220,6 +144,63 @@ func GetGeminiCLIModels() []*ModelInfo {
 	}
 }

+// GetGeminiModels returns the standard Gemini model definitions
+func GetGeminiModels() []*ModelInfo { return GeminiModels() }
+
+// GetGeminiCLIModels returns the standard Gemini model definitions
+func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() }
+
+// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
+func GetAIStudioModels() []*ModelInfo {
+	models := make([]*ModelInfo, 0, 8)
+	models = append(models, GeminiModels()...)
+	models = append(models,
+		&ModelInfo{
+			ID:                         "gemini-pro-latest",
+			Object:                     "model",
+			Created:                    time.Now().Unix(),
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-pro-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Pro Latest",
+			Description:                "Latest release of Gemini Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		},
+		&ModelInfo{
+			ID:                         "gemini-flash-latest",
+			Object:                     "model",
+			Created:                    time.Now().Unix(),
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash Latest",
+			Description:                "Latest release of Gemini Flash",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		},
+		&ModelInfo{
+			ID:                         "gemini-flash-lite-latest",
+			Object:                     "model",
+			Created:                    time.Now().Unix(),
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-lite-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash-Lite Latest",
+			Description:                "Latest release of Gemini Flash-Lite",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		},
+	)
+	return models
+}
+
 // GetOpenAIModels returns the standard OpenAI model definitions
 func GetOpenAIModels() []*ModelInfo {
 	return []*ModelInfo{
@@ -417,7 +398,6 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language"},
 		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build"},
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905"},
-		{ID: "glm-4.5", DisplayName: "GLM-4.5", Description: "Zhipu GLM 4.5 general model"},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model"},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model"},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental"},
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -19,27 +19,27 @@ import (
 	"github.com/tidwall/sjson"
 )

-// AistudioExecutor routes AI Studio requests through a websocket-backed transport.
-type AistudioExecutor struct {
+// AIStudioExecutor routes AI Studio requests through a websocket-backed transport.
+type AIStudioExecutor struct {
 	provider string
 	relay    *wsrelay.Manager
 	cfg      *config.Config
 }

-// NewAistudioExecutor constructs a websocket executor for the provider name.
-func NewAistudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AistudioExecutor {
-	return &AistudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
+// NewAIStudioExecutor constructs a websocket executor for the provider name.
+func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AIStudioExecutor {
+	return &AIStudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
 }

-// Identifier returns the provider key served by this executor.
-func (e *AistudioExecutor) Identifier() string { return e.provider }
+// Identifier returns the logical provider key for routing.
+func (e *AIStudioExecutor) Identifier() string { return "aistudio" }

 // PrepareRequest is a no-op because websocket transport already injects headers.
-func (e *AistudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
+func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
 	return nil
 }

-func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

@@ -66,7 +66,7 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
 		Body:      bytes.Clone(body.payload),
-		Provider:  e.provider,
+		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
 		AuthType:  authType,
@@ -92,7 +92,7 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 	return resp, nil
 }

-func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

@@ -118,7 +118,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
 		Body:      bytes.Clone(body.payload),
-		Provider:  e.provider,
+		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
 		AuthType:  authType,
@@ -151,7 +151,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 			case wsrelay.MessageTypeStreamChunk:
 				if len(event.Payload) > 0 {
 					appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
-					filtered := filterAistudioUsageMetadata(event.Payload)
+					filtered := filterAIStudioUsageMetadata(event.Payload)
 					if detail, ok := parseGeminiStreamUsage(filtered); ok {
 						reporter.publish(ctx, detail)
 					}
@@ -188,7 +188,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 	return stream, nil
 }

-func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	_, body, err := e.translateRequest(req, opts, false)
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
@@ -215,7 +215,7 @@ func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
 		Body:      bytes.Clone(body.payload),
-		Provider:  e.provider,
+		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
 		AuthType:  authType,
@@ -241,7 +241,7 @@ func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }

-func (e *AistudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
+func (e *AIStudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
 	_ = ctx
 	return auth, nil
 }
@@ -252,7 +252,7 @@ type translatedPayload struct {
 	toFormat sdktranslator.Format
 }

-func (e *AistudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
+func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
@@ -275,7 +275,7 @@ func (e *AistudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	return payload, translatedPayload{payload: payload, action: action, toFormat: to}, nil
 }

-func (e *AistudioExecutor) buildEndpoint(model, action, alt string) string {
+func (e *AIStudioExecutor) buildEndpoint(model, action, alt string) string {
 	base := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, model, action)
 	if action == "streamGenerateContent" {
 		if alt == "" {
@@ -289,9 +289,9 @@ func (e *AistudioExecutor) buildEndpoint(model, action, alt string) string {
 	return base
 }

-// filterAistudioUsageMetadata removes usageMetadata from intermediate SSE events so that
+// filterAIStudioUsageMetadata removes usageMetadata from intermediate SSE events so that
 // only the terminal chunk retains token statistics.
-func filterAistudioUsageMetadata(payload []byte) []byte {
+func filterAIStudioUsageMetadata(payload []byte) []byte {
 	if len(payload) == 0 {
 		return payload
 	}