diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 950141f0..96ee18d0 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -98,7 +98,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) @@ -193,7 +193,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, true) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) @@ -529,7 +529,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) @@ -698,7 +698,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut for idx, baseURL := range baseURLs { payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload) + payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload) payload = normalizeAntigravityThinking(req.Model, payload, isClaude) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index a3b75839..74ded2a6 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -80,7 +80,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -219,7 +219,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index adb224a8..9fffb06d 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -14,15 +14,18 @@ import ( // ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192)) // for standard Gemini format payloads. It normalizes the budget when the model supports thinking. func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) + // Use the alias from metadata if available, as it's registered in the global registry + // with thinking metadata; the upstream model name may not be registered. + lookupModel := util.ResolveOriginalModel(model, metadata) + budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata) if !ok || (budgetOverride == nil && includeOverride == nil) { return payload } - if !util.ModelSupportsThinking(model) { + if !util.ModelSupportsThinking(lookupModel) { return payload } if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) + norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride) budgetOverride = &norm } return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) @@ -31,15 +34,18 @@ func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string // applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192)) // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) + // Use the alias from metadata if available, as it's registered in the global registry + // with thinking metadata; the upstream model name may not be registered. + lookupModel := util.ResolveOriginalModel(model, metadata) + budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata) if !ok || (budgetOverride == nil && includeOverride == nil) { return payload } - if !util.ModelSupportsThinking(model) { + if !util.ModelSupportsThinking(lookupModel) { return payload } if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) + norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride) budgetOverride = &norm } return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 290d5f92..5ebbf426 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -290,15 +290,21 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { // For standard Gemini API format (generationConfig.thinkingConfig path). // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte { - if !IsGemini3Model(model) { + // Use the alias from metadata if available for model type detection + lookupModel := ResolveOriginalModel(model, metadata) + if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { return body } effort, ok := ReasoningEffortFromMetadata(metadata) if !ok || effort == "" { return body } - // Validate and apply the thinkingLevel - if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid { + // Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern + checkModel := model + if IsGemini3Model(lookupModel) { + checkModel = lookupModel + } + if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { return ApplyGeminiThinkingLevel(body, level, nil) } return body @@ -308,15 +314,21 @@ func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any // For Gemini CLI API format (request.generationConfig.thinkingConfig path). // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte { - if !IsGemini3Model(model) { + // Use the alias from metadata if available for model type detection + lookupModel := ResolveOriginalModel(model, metadata) + if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { return body } effort, ok := ReasoningEffortFromMetadata(metadata) if !ok || effort == "" { return body } - // Validate and apply the thinkingLevel - if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid { + // Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern + checkModel := model + if IsGemini3Model(lookupModel) { + checkModel = lookupModel + } + if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { return ApplyGeminiCLIThinkingLevel(body, level, nil) } return body @@ -326,15 +338,17 @@ func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string] // For Gemini CLI API format (request.generationConfig.thinkingConfig path). // Returns the modified body if thinkingConfig was added, otherwise returns the original. // For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation. -func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte { - if !ModelHasDefaultThinking(model) { +func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte { + // Use the alias from metadata if available for model property lookup + lookupModel := ResolveOriginalModel(model, metadata) + if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) { return body } if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() { return body } // Gemini 3 models use thinkingLevel instead of thinkingBudget - if IsGemini3Model(model) { + if IsGemini3Model(lookupModel) || IsGemini3Model(model) { // Don't set a default - let the API use its dynamic default ("high") // Only set includeThoughts updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true) diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/model_name_mappings.go index f1b31aa5..03380c09 100644 --- a/sdk/cliproxy/auth/model_name_mappings.go +++ b/sdk/cliproxy/auth/model_name_mappings.go @@ -81,7 +81,9 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta out[k] = v } } - out[util.ModelMappingOriginalModelMetadataKey] = upstreamModel + // Store the requested alias (e.g., "gp") so downstream can use it to look up + // model metadata from the global registry where it was registered under this alias. + out[util.ModelMappingOriginalModelMetadataKey] = requestedModel return upstreamModel, out }