From 3d7aca22c0466f6058e27f543f8d8ebfdbe47e30 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 12:00:35 +0800 Subject: [PATCH] feat(registry): add thinking budget support; populate Gemini models --- internal/registry/model_definitions.go | 14 +++++++++---- internal/registry/model_registry.go | 28 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index ebc1a573..2536d27b 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -84,6 +84,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, { ID: "gemini-2.5-pro", @@ -98,6 +99,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "gemini-2.5-flash-lite", @@ -112,6 +114,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, { ID: "gemini-2.5-flash-image-preview", @@ -126,6 +129,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 8192, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + // image models don't support thinkingConfig; leave Thinking nil }, { ID: "gemini-2.5-flash-image", @@ -140,6 +144,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 8192, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + // image models don't support thinkingConfig; leave Thinking nil }, } } @@ -152,9 +157,8 @@ func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() } // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations func GetAIStudioModels() []*ModelInfo { - models := make([]*ModelInfo, 0, 8) - models = append(models, GeminiModels()...) - models = append(models, + base := GeminiModels() + return append(base, &ModelInfo{ ID: "gemini-pro-latest", Object: "model", @@ -168,6 +172,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, &ModelInfo{ ID: "gemini-flash-latest", @@ -182,6 +187,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, &ModelInfo{ ID: "gemini-flash-lite-latest", @@ -196,9 +202,9 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, ) - return models } // GetOpenAIModels returns the standard OpenAI model definitions diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index e1223978..46ada713 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -45,6 +45,23 @@ type ModelInfo struct { MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // SupportedParameters lists supported parameters SupportedParameters []string `json:"supported_parameters,omitempty"` + + // Thinking holds provider-specific reasoning/thinking budget capabilities. + // This is optional and currently used for Gemini thinking budget normalization. + Thinking *ThinkingSupport `json:"thinking,omitempty"` +} + +// ThinkingSupport describes a model family's supported internal reasoning budget range. +// Values are interpreted in provider-native token units. +type ThinkingSupport struct { + // Min is the minimum allowed thinking budget (inclusive). + Min int `json:"min,omitempty"` + // Max is the maximum allowed thinking budget (inclusive). + Max int `json:"max,omitempty"` + // ZeroAllowed indicates whether 0 is a valid value (to disable thinking). + ZeroAllowed bool `json:"zero_allowed,omitempty"` + // DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget). + DynamicAllowed bool `json:"dynamic_allowed,omitempty"` } // ModelRegistration tracks a model's availability @@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string { return result } +// GetModelInfo returns the registered ModelInfo for the given model ID, if present. +// Returns nil if the model is unknown to the registry. +func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo { + r.mutex.RLock() + defer r.mutex.RUnlock() + if reg, ok := r.models[modelID]; ok && reg != nil { + return reg.Info + } + return nil +} + // convertModelToMap converts ModelInfo to the appropriate format for different handler types func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any { if model == nil {