fix(thinking): map budgets to effort levels

Ensure thinking settings translate correctly across providers: - Only apply reasoning_effort to level-based models and derive it from numeric budget suffixes when present - Strip effort string fields for budget-based models and skip Claude/Gemini budget resolution for level-based or unsupported models - Default Gemini include_thoughts when a nonzero budget override is set - Add cross-protocol conversion and budget range tests
2026-02-03 04:50:52 +08:00 · 2025-12-12 21:33:20 +08:00
parent d131435e25
commit 374faa2640
6 changed files with 645 additions and 12 deletions
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -59,8 +59,20 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-			return updated
+		if util.ModelUsesThinkingLevels(model) {
+			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+				return updated
+			}
+		}
+	}
+	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
+	if util.ModelUsesThinkingLevels(model) {
+		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
+				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+					return updated
+				}
+			}
 		}
 	}
 	return payload
@@ -219,31 +231,37 @@ func matchModelPattern(pattern, model string) bool {
 // normalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
-// normalizes the reasoning effort level.
+// normalizes the reasoning effort level. For models with numeric budget thinking,
+// it strips the effort string fields.
 func normalizeThinkingConfig(payload []byte, model string) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}

 	if !util.ModelSupportsThinking(model) {
-		return stripThinkingFields(payload)
+		return stripThinkingFields(payload, false)
 	}

 	if util.ModelUsesThinkingLevels(model) {
 		return normalizeReasoningEffortLevel(payload, model)
 	}

-	return payload
+	// Model supports thinking but uses numeric budgets, not levels.
+	// Strip effort string fields since they are not applicable.
+	return stripThinkingFields(payload, true)
 }

 // stripThinkingFields removes thinking-related fields from the payload for
-// models that do not support thinking.
-func stripThinkingFields(payload []byte) []byte {
+// models that do not support thinking. If effortOnly is true, only removes
+// effort string fields (for models using numeric budgets).
+func stripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
-		"reasoning",
 		"reasoning_effort",
 		"reasoning.effort",
 	}
+	if !effortOnly {
+		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
+	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
--- a/internal/util/claude_thinking.go
+++ b/internal/util/claude_thinking.go
@@ -28,6 +28,9 @@ func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
 // It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
 // Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
 func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
+	if !ModelSupportsThinking(modelName) {
+		return nil, false
+	}
 	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
 	if !matched {
 		return nil, false
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -25,9 +25,15 @@ func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool)
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && budget != nil && *budget != 0 {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
 		valuePath := "generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
@@ -47,9 +53,15 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && budget != nil && *budget != 0 {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
 		valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
--- a/internal/util/openai_thinking.go
+++ b/internal/util/openai_thinking.go
@@ -0,0 +1,34 @@
+package util
+
+// OpenAIThinkingBudgetToEffort maps a numeric thinking budget (tokens)
+// into an OpenAI-style reasoning effort level for level-based models.
+//
+// Ranges:
+//   - 0            -> "none"
+//   - 1..1024      -> "low"
+//   - 1025..8192   -> "medium"
+//   - 8193..24576  -> "high"
+//   - 24577..      -> highest supported level for the model (defaults to "xhigh")
+//
+// Negative values (except the dynamic -1 handled elsewhere) are treated as unsupported.
+func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) {
+	switch {
+	case budget < 0:
+		return "", false
+	case budget == 0:
+		return "none", true
+	case budget > 0 && budget <= 1024:
+		return "low", true
+	case budget <= 8192:
+		return "medium", true
+	case budget <= 24576:
+		return "high", true
+	case budget > 24576:
+		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
+			return levels[len(levels)-1], true
+		}
+		return "xhigh", true
+	default:
+		return "", false
+	}
+}
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -163,6 +163,11 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*
 	if !matched {
 		return nil, nil, false
 	}
+	// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
+	// Claude/Gemini-style protocols, so we don't derive budgets for them here.
+	if ModelUsesThinkingLevels(model) {
+		return nil, nil, false
+	}

 	if budget == nil && effort != nil {
 		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {