test(thinking): expand conversion edge case coverage

2026-02-02 04:20:50 +08:00 · 2025-12-14 18:32:20 +08:00
parent 716aa71f6e
commit a4a3274a55
1 changed files with 63 additions and 50 deletions
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -28,7 +28,7 @@ func (e statusErr) Error() string { return e.msg }
 // model that should have reasoning effort passed through even if not in registry.
 // This simulates the allowCompat behavior from OpenAICompatExecutor.
 func isOpenAICompatModel(model string) bool {
-	return model == "custom-thinking-model"
+	return model == "openai-compat"
 }

 // registerCoreModels loads representative models across providers into the registry
@@ -47,12 +47,12 @@ func registerCoreModels(t *testing.T) func() {
 	// The allowCompat flag should preserve reasoning effort for such models.
 	customOpenAIModels := []*registry.ModelInfo{
 		{
-			ID:          "custom-thinking-model",
+			ID:          "openai-compat",
 			Object:      "model",
 			Created:     1700000000,
 			OwnedBy:     "custom-provider",
 			Type:        "openai",
-			DisplayName: "Custom Thinking Model",
+			DisplayName: "OpenAI Compatible Model",
 			Description: "OpenAI-compatible model with forced thinking suffix support",
 		},
 	}
@@ -321,10 +321,10 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	defer cleanup()

 	models := []string{
-		"gpt-5",                 // supports levels (low/medium/high)
-		"gemini-2.5-pro",        // supports numeric budget
-		"qwen3-coder-flash",     // no thinking support
-		"custom-thinking-model", // openai-compatible model with forced thinking suffix
+		"gpt-5",           // supports levels (level-based thinking)
+		"gemini-2.5-pro",  // supports numeric budget
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // openai-compatible channel (allowCompat=true)
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -357,12 +357,7 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	}

 	for _, model := range models {
-		info := registry.GetGlobalRegistry().GetModelInfo(model)
-		min, max := 0, 0
-		if info != nil && info.Thinking != nil {
-			min = info.Thinking.Min
-			max = info.Thinking.Max
-		}
+		_ = registry.GetGlobalRegistry().GetModelInfo(model)

 		for _, from := range fromProtocols {
 			// Scenario selection follows protocol semantics:
@@ -372,29 +367,29 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
 			}
 			if from == "openai" || from == "openai-response" {
+				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
+				// Maps to numeric: -1, 0, 512, 1024, 8192, 24576, 32768, invalid
 				cases = append(cases,
-					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
-					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
 					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
+					scenario{name: "level-none", modelSuffix: fmt.Sprintf("%s(none)", model), expectFn: levelBudgetFn("none")},
+					scenario{name: "level-minimal", modelSuffix: fmt.Sprintf("%s(minimal)", model), expectFn: levelBudgetFn("minimal")},
+					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
+					scenario{name: "level-medium", modelSuffix: fmt.Sprintf("%s(medium)", model), expectFn: levelBudgetFn("medium")},
+					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
+					scenario{name: "level-xhigh", modelSuffix: fmt.Sprintf("%s(xhigh)", model), expectFn: levelBudgetFn("xhigh")},
+					scenario{name: "level-invalid", modelSuffix: fmt.Sprintf("%s(invalid)", model), expectFn: levelBudgetFn("invalid")},
 				)
 			} else { // claude or gemini
-				if util.ModelUsesThinkingLevels(model) {
-					// Numeric budgets for level-based models are mapped into levels when needed.
-					cases = append(cases,
-						scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
-						scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)},
-						scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
-						scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)},
-						scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
-						scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)},
-						scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
-					)
-				} else {
-					cases = append(cases,
-						scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)},
-						scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)},
-					)
-				}
+				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
+				// Maps to levels: auto, none, low, medium, high, xhigh
+				cases = append(cases,
+					scenario{name: "numeric-neg1", modelSuffix: fmt.Sprintf("%s(-1)", model), expectFn: buildBudgetFn(-1)},
+					scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
+					scenario{name: "numeric-1023", modelSuffix: fmt.Sprintf("%s(1023)", model), expectFn: buildBudgetFn(1023)},
+					scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
+					scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
+					scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
+				)
 			}

 			for _, to := range toProtocols {
@@ -642,10 +637,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 	defer cleanup()

 	models := []string{
-		"gpt-5",                 // supports levels (low/medium/high)
-		"gemini-2.5-pro",        // supports numeric budget
-		"qwen3-coder-flash",     // no thinking support
-		"custom-thinking-model", // openai-compatible model with forced thinking suffix
+		"gpt-5",           // supports levels (level-based thinking)
+		"gemini-2.5-pro",  // supports numeric budget
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // openai-compatible channel (allowCompat=true)
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -664,27 +659,28 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 			var cases []scenario
 			switch from {
 			case "openai", "openai-response":
+				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
+					{name: "effort-auto", thinkingParam: "auto"},
+					{name: "effort-none", thinkingParam: "none"},
+					{name: "effort-minimal", thinkingParam: "minimal"},
 					{name: "effort-low", thinkingParam: "low"},
 					{name: "effort-medium", thinkingParam: "medium"},
 					{name: "effort-high", thinkingParam: "high"},
 					{name: "effort-xhigh", thinkingParam: "xhigh"},
-					{name: "effort-invalid-foo", thinkingParam: "foo"},
+					{name: "effort-invalid", thinkingParam: "invalid"},
 				}
-			case "gemini":
+			case "gemini", "claude":
+				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
-					{name: "budget-1024", thinkingParam: 1024},
-					{name: "budget-8192", thinkingParam: 8192},
-					{name: "budget-16384", thinkingParam: 16384},
-				}
-			case "claude":
-				cases = []scenario{
-					{name: "no-thinking", thinkingParam: nil},
-					{name: "budget-1024", thinkingParam: 1024},
-					{name: "budget-8192", thinkingParam: 8192},
-					{name: "budget-16384", thinkingParam: 16384},
+					{name: "budget-neg1", thinkingParam: -1},
+					{name: "budget-0", thinkingParam: 0},
+					{name: "budget-1023", thinkingParam: 1023},
+					{name: "budget-1025", thinkingParam: 1025},
+					{name: "budget-8193", thinkingParam: 8193},
+					{name: "budget-24577", thinkingParam: 24577},
 				}
 			}

@@ -723,12 +719,16 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								// Convert effort level to budget for non-level models only
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" disables thinking - no thinkingBudget in output
+									if strings.ToLower(effort) == "none" {
+										return false, "", false
+									}
 									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
 										// ThinkingEffortToBudget already returns normalized budget
 										return true, fmt.Sprintf("%d", budget), false
 									}
-									// Invalid effort maps to default/fallback
-									return true, fmt.Sprintf("%d", -1), false
+									// Invalid effort maps to default auto (-1)
+									return true, "-1", false
 								}
 								return false, "", false
 							case "claude":
@@ -742,6 +742,11 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								// Convert effort level to budget for non-level models only
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" and "auto" don't produce budget_tokens
+									lower := strings.ToLower(effort)
+									if lower == "none" || lower == "auto" {
+										return false, "", false
+									}
 									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
 										// ThinkingEffortToBudget already returns normalized budget
 										return true, fmt.Sprintf("%d", budget), false
@@ -780,6 +785,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								if budget, ok := cs.thinkingParam.(int); ok {
 									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
 										return true, mapped, false
 									}
 								}
@@ -797,6 +806,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								if budget, ok := cs.thinkingParam.(int); ok {
 									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
 										return true, mapped, false
 									}
 								}