fix(thinking): use model alias for thinking config resolution in mapped models

2026-02-18 04:10:51 +08:00 · 2025-12-31 16:10:51 +08:00
parent e332419081
commit 89db4e9481
5 changed files with 44 additions and 22 deletions
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -98,7 +98,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
@@ -193,7 +193,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, true)
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
@@ -529,7 +529,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
@@ -698,7 +698,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	for idx, baseURL := range baseURLs {
 		payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 		payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
-		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload)
+		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
 		payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -80,7 +80,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
@@ -219,7 +219,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -14,15 +14,18 @@ import (
 // ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(lookupModel) {
 		return payload
 	}
 	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+		norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
@@ -31,15 +34,18 @@ func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string
 // applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
 func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(lookupModel) {
 		return payload
 	}
 	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+		norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -290,15 +290,21 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
 // For standard Gemini API format (generationConfig.thinkingConfig path).
 // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
 func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
-	if !IsGemini3Model(model) {
+	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	effort, ok := ReasoningEffortFromMetadata(metadata)
 	if !ok || effort == "" {
 		return body
 	}
-	// Validate and apply the thinkingLevel
+	// Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern
-	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 		return ApplyGeminiThinkingLevel(body, level, nil)
 	}
 	return body
@@ -308,15 +314,21 @@ func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
 func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
-	if !IsGemini3Model(model) {
+	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	effort, ok := ReasoningEffortFromMetadata(metadata)
 	if !ok || effort == "" {
 		return body
 	}
-	// Validate and apply the thinkingLevel
+	// Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern
-	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 		return ApplyGeminiCLIThinkingLevel(body, level, nil)
 	}
 	return body
@@ -326,15 +338,17 @@ func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Returns the modified body if thinkingConfig was added, otherwise returns the original.
 // For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
-func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
+func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
-	if !ModelHasDefaultThinking(model) {
+	// Use the alias from metadata if available for model property lookup
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
 		return body
 	}
 	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
 		return body
 	}
 	// Gemini 3 models use thinkingLevel instead of thinkingBudget
-	if IsGemini3Model(model) {
+	if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
 		// Don't set a default - let the API use its dynamic default ("high")
 		// Only set includeThoughts
 		updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
--- a/sdk/cliproxy/auth/model_name_mappings.go
+++ b/sdk/cliproxy/auth/model_name_mappings.go
@@ -81,7 +81,9 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta
 			out[k] = v
 		}
 	}
-	out[util.ModelMappingOriginalModelMetadataKey] = upstreamModel
+	// Store the requested alias (e.g., "gp") so downstream can use it to look up
 	// model metadata from the global registry where it was registered under this alias.
 	out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 	return upstreamModel, out
 }