fix(thinking): normalize effort mapping

Route OpenAI reasoning effort through ThinkingEffortToBudget for Claude translators, preserve "minimal" when translating OpenAI Responses, and treat blank/unknown efforts as no-ops for Gemini thinking configs. Also map budget -1 to "auto" and expand cross-protocol thinking tests.
2026-02-02 04:20:50 +08:00 · 2025-12-14 20:11:30 +08:00
parent 712ce9f781
commit d20b71deb9
6 changed files with 160 additions and 142 deletions
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -66,6 +66,37 @@ func registerCoreModels(t *testing.T) func() {
 	}
 }

+var (
+	thinkingTestModels = []string{
+		"gpt-5",           // level-based thinking model
+		"gemini-2.5-pro",  // numeric-budget thinking model
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // allowCompat=true (OpenAI-compatible channel)
+	}
+	thinkingTestFromProtocols = []string{"openai", "claude", "gemini", "openai-response"}
+	thinkingTestToProtocols   = []string{"gemini", "claude", "openai", "codex"}
+
+	// Numeric budgets and their level equivalents:
+	// -1 -> auto
+	// 0 -> none
+	// 1..1024 -> low
+	// 1025..8192 -> medium
+	// 8193..24576 -> high
+	// >24576 -> model highest level (right-most in Levels)
+	thinkingNumericSamples = []int{-1, 0, 1023, 1025, 8193, 64000}
+
+	// Levels and their numeric equivalents:
+	// auto -> -1
+	// none -> 0
+	// minimal -> 512
+	// low -> 1024
+	// medium -> 8192
+	// high -> 24576
+	// xhigh -> 32768
+	// invalid -> invalid (no mapping)
+	thinkingLevelSamples = []string{"auto", "none", "minimal", "low", "medium", "high", "xhigh", "invalid"}
+)
+
 func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
 	switch fromProtocol {
 	case "gemini":
@@ -101,19 +132,30 @@ func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any,
 	if field == "" {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) && !allowCompat {
+	baseModel := util.ResolveOriginalModel(model, metadata)
+	if baseModel == "" {
+		baseModel = model
+	}
+	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if util.ModelUsesThinkingLevels(model) || allowCompat {
+		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 				return updated
 			}
 		}
 	}
-	if util.ModelUsesThinkingLevels(model) || allowCompat {
+	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
+	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
+			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
+				if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) {
+					if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported {
+						return stripThinkingFieldsLocal(payload, false)
+					}
+				}
+
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
@@ -320,79 +362,46 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()

-	models := []string{
-		"gpt-5",           // supports levels (level-based thinking)
-		"gemini-2.5-pro",  // supports numeric budget
-		"qwen3-code-plus", // no thinking support
-		"openai-compat",   // openai-compatible channel (allowCompat=true)
-	}
-	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
-	toProtocols := []string{"gemini", "claude", "openai", "codex"}
-
 	type scenario struct {
 		name        string
 		modelSuffix string
-		expectFn    func(info *registry.ModelInfo) (present bool, budget int64)
 	}

-	buildBudgetFn := func(raw int) func(info *registry.ModelInfo) (bool, int64) {
-		return func(info *registry.ModelInfo) (bool, int64) {
-			if info == nil || info.Thinking == nil {
-				return false, 0
-			}
-			return true, int64(util.NormalizeThinkingBudget(info.ID, raw))
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "numeric-neg1"
 		}
+		return fmt.Sprintf("numeric-%d", budget)
 	}

-	levelBudgetFn := func(level string) func(info *registry.ModelInfo) (bool, int64) {
-		return func(info *registry.ModelInfo) (bool, int64) {
-			if info == nil || info.Thinking == nil {
-				return false, 0
-			}
-			if b, ok := util.ThinkingEffortToBudget(info.ID, level); ok {
-				return true, int64(b)
-			}
-			return false, 0
-		}
-	}
-
-	for _, model := range models {
+	for _, model := range thinkingTestModels {
 		_ = registry.GetGlobalRegistry().GetModelInfo(model)

-		for _, from := range fromProtocols {
+		for _, from := range thinkingTestFromProtocols {
 			// Scenario selection follows protocol semantics:
 			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
 			// - Claude/Gemini-style protocols express thinking as numeric budgets.
 			cases := []scenario{
-				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
+				{name: "no-suffix", modelSuffix: model},
 			}
 			if from == "openai" || from == "openai-response" {
-				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
-				// Maps to numeric: -1, 0, 512, 1024, 8192, 24576, 32768, invalid
-				cases = append(cases,
-					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
-					scenario{name: "level-none", modelSuffix: fmt.Sprintf("%s(none)", model), expectFn: levelBudgetFn("none")},
-					scenario{name: "level-minimal", modelSuffix: fmt.Sprintf("%s(minimal)", model), expectFn: levelBudgetFn("minimal")},
-					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
-					scenario{name: "level-medium", modelSuffix: fmt.Sprintf("%s(medium)", model), expectFn: levelBudgetFn("medium")},
-					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
-					scenario{name: "level-xhigh", modelSuffix: fmt.Sprintf("%s(xhigh)", model), expectFn: levelBudgetFn("xhigh")},
-					scenario{name: "level-invalid", modelSuffix: fmt.Sprintf("%s(invalid)", model), expectFn: levelBudgetFn("invalid")},
-				)
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:        "level-" + lvl,
+						modelSuffix: fmt.Sprintf("%s(%s)", model, lvl),
+					})
+				}
 			} else { // claude or gemini
-				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
-				// Maps to levels: auto, none, low, medium, high, xhigh
-				cases = append(cases,
-					scenario{name: "numeric-neg1", modelSuffix: fmt.Sprintf("%s(-1)", model), expectFn: buildBudgetFn(-1)},
-					scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
-					scenario{name: "numeric-1023", modelSuffix: fmt.Sprintf("%s(1023)", model), expectFn: buildBudgetFn(1023)},
-					scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
-					scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
-					scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
-				)
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:        numericName(budget),
+						modelSuffix: fmt.Sprintf("%s(%d)", model, budget),
+					})
+				}
 			}

-			for _, to := range toProtocols {
+			for _, to := range thinkingTestToProtocols {
 				if from == to {
 					continue
 				}
@@ -585,7 +594,7 @@ func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any)
 		return []byte(base)
 	case "claude":
 		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
-		if budget, ok := thinkingParam.(int); ok && budget > 0 {
+		if budget, ok := thinkingParam.(int); ok {
 			base, _ = sjson.Set(base, "thinking.type", "enabled")
 			base, _ = sjson.Set(base, "thinking.budget_tokens", budget)
 		}
@@ -636,55 +645,50 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()

-	models := []string{
-		"gpt-5",           // supports levels (level-based thinking)
-		"gemini-2.5-pro",  // supports numeric budget
-		"qwen3-code-plus", // no thinking support
-		"openai-compat",   // openai-compatible channel (allowCompat=true)
-	}
-	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
-	toProtocols := []string{"gemini", "claude", "openai", "codex"}
-
 	type scenario struct {
 		name          string
 		thinkingParam any // int for budget, string for effort level
 	}

-	for _, model := range models {
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "budget-neg1"
+		}
+		return fmt.Sprintf("budget-%d", budget)
+	}
+
+	for _, model := range thinkingTestModels {
 		supportsThinking := util.ModelSupportsThinking(model)
 		usesLevels := util.ModelUsesThinkingLevels(model)
 		allowCompat := isOpenAICompatModel(model)

-		for _, from := range fromProtocols {
+		for _, from := range thinkingTestFromProtocols {
 			var cases []scenario
 			switch from {
 			case "openai", "openai-response":
-				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
-					{name: "effort-auto", thinkingParam: "auto"},
-					{name: "effort-none", thinkingParam: "none"},
-					{name: "effort-minimal", thinkingParam: "minimal"},
-					{name: "effort-low", thinkingParam: "low"},
-					{name: "effort-medium", thinkingParam: "medium"},
-					{name: "effort-high", thinkingParam: "high"},
-					{name: "effort-xhigh", thinkingParam: "xhigh"},
-					{name: "effort-invalid", thinkingParam: "invalid"},
+				}
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:          "effort-" + lvl,
+						thinkingParam: lvl,
+					})
 				}
 			case "gemini", "claude":
-				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
-					{name: "budget-neg1", thinkingParam: -1},
-					{name: "budget-0", thinkingParam: 0},
-					{name: "budget-1023", thinkingParam: 1023},
-					{name: "budget-1025", thinkingParam: 1025},
-					{name: "budget-8193", thinkingParam: 8193},
-					{name: "budget-24577", thinkingParam: 24577},
+				}
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:          numericName(budget),
+						thinkingParam: budget,
+					})
 				}
 			}

-			for _, to := range toProtocols {
+			for _, to := range thinkingTestToProtocols {
 				if from == to {
 					continue
 				}
@@ -727,8 +731,8 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 										// ThinkingEffortToBudget already returns normalized budget
 										return true, fmt.Sprintf("%d", budget), false
 									}
-									// Invalid effort maps to default auto (-1)
-									return true, "-1", false
+									// Invalid effort does not map to a budget
+									return false, "", false
 								}
 								return false, "", false
 							case "claude":
@@ -758,14 +762,8 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 							case "openai":
 								if allowCompat {
 									if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" {
-										// For allowCompat models, invalid effort values are normalized to "auto"
 										normalized := strings.ToLower(strings.TrimSpace(effort))
-										switch normalized {
-										case "none", "auto", "low", "medium", "high", "xhigh":
-											return true, normalized, false
-										default:
-											return true, "auto", false
-										}
+										return true, normalized, false
 									}
 									if budget, ok := cs.thinkingParam.(int); ok {
 										if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
@@ -891,6 +889,7 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 		want   string
 		ok     bool
 	}{
+		{name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true},
 		{name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true},
 		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
 		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
@@ -898,7 +897,7 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
 		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
 		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
-		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 24577, want: "high", ok: true},
+		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true},
 		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true},
 		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
 	}