test(thinking): expand conversion edge case coverage

This commit is contained in:
hkfires
2025-12-14 18:32:20 +08:00
parent 716aa71f6e
commit a4a3274a55

View File

@@ -28,7 +28,7 @@ func (e statusErr) Error() string { return e.msg }
// model that should have reasoning effort passed through even if not in registry. // model that should have reasoning effort passed through even if not in registry.
// This simulates the allowCompat behavior from OpenAICompatExecutor. // This simulates the allowCompat behavior from OpenAICompatExecutor.
func isOpenAICompatModel(model string) bool { func isOpenAICompatModel(model string) bool {
return model == "custom-thinking-model" return model == "openai-compat"
} }
// registerCoreModels loads representative models across providers into the registry // registerCoreModels loads representative models across providers into the registry
@@ -47,12 +47,12 @@ func registerCoreModels(t *testing.T) func() {
// The allowCompat flag should preserve reasoning effort for such models. // The allowCompat flag should preserve reasoning effort for such models.
customOpenAIModels := []*registry.ModelInfo{ customOpenAIModels := []*registry.ModelInfo{
{ {
ID: "custom-thinking-model", ID: "openai-compat",
Object: "model", Object: "model",
Created: 1700000000, Created: 1700000000,
OwnedBy: "custom-provider", OwnedBy: "custom-provider",
Type: "openai", Type: "openai",
DisplayName: "Custom Thinking Model", DisplayName: "OpenAI Compatible Model",
Description: "OpenAI-compatible model with forced thinking suffix support", Description: "OpenAI-compatible model with forced thinking suffix support",
}, },
} }
@@ -321,10 +321,10 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
defer cleanup() defer cleanup()
models := []string{ models := []string{
"gpt-5", // supports levels (low/medium/high) "gpt-5", // supports levels (level-based thinking)
"gemini-2.5-pro", // supports numeric budget "gemini-2.5-pro", // supports numeric budget
"qwen3-coder-flash", // no thinking support "qwen3-code-plus", // no thinking support
"custom-thinking-model", // openai-compatible model with forced thinking suffix "openai-compat", // openai-compatible channel (allowCompat=true)
} }
fromProtocols := []string{"openai", "claude", "gemini", "openai-response"} fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
toProtocols := []string{"gemini", "claude", "openai", "codex"} toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -357,12 +357,7 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
} }
for _, model := range models { for _, model := range models {
info := registry.GetGlobalRegistry().GetModelInfo(model) _ = registry.GetGlobalRegistry().GetModelInfo(model)
min, max := 0, 0
if info != nil && info.Thinking != nil {
min = info.Thinking.Min
max = info.Thinking.Max
}
for _, from := range fromProtocols { for _, from := range fromProtocols {
// Scenario selection follows protocol semantics: // Scenario selection follows protocol semantics:
@@ -372,29 +367,29 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }}, {name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
} }
if from == "openai" || from == "openai-response" { if from == "openai" || from == "openai-response" {
// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
// Maps to numeric: -1, 0, 512, 1024, 8192, 24576, 32768, invalid
cases = append(cases, cases = append(cases,
scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")}, scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
scenario{name: "level-none", modelSuffix: fmt.Sprintf("%s(none)", model), expectFn: levelBudgetFn("none")},
scenario{name: "level-minimal", modelSuffix: fmt.Sprintf("%s(minimal)", model), expectFn: levelBudgetFn("minimal")},
scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
scenario{name: "level-medium", modelSuffix: fmt.Sprintf("%s(medium)", model), expectFn: levelBudgetFn("medium")},
scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
scenario{name: "level-xhigh", modelSuffix: fmt.Sprintf("%s(xhigh)", model), expectFn: levelBudgetFn("xhigh")},
scenario{name: "level-invalid", modelSuffix: fmt.Sprintf("%s(invalid)", model), expectFn: levelBudgetFn("invalid")},
) )
} else { // claude or gemini } else { // claude or gemini
if util.ModelUsesThinkingLevels(model) { // Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
// Numeric budgets for level-based models are mapped into levels when needed. // Maps to levels: auto, none, low, medium, high, xhigh
cases = append(cases, cases = append(cases,
scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)}, scenario{name: "numeric-neg1", modelSuffix: fmt.Sprintf("%s(-1)", model), expectFn: buildBudgetFn(-1)},
scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)}, scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)}, scenario{name: "numeric-1023", modelSuffix: fmt.Sprintf("%s(1023)", model), expectFn: buildBudgetFn(1023)},
scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)}, scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)}, scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)}, scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)}, )
)
} else {
cases = append(cases,
scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)},
scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)},
)
}
} }
for _, to := range toProtocols { for _, to := range toProtocols {
@@ -642,10 +637,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
defer cleanup() defer cleanup()
models := []string{ models := []string{
"gpt-5", // supports levels (low/medium/high) "gpt-5", // supports levels (level-based thinking)
"gemini-2.5-pro", // supports numeric budget "gemini-2.5-pro", // supports numeric budget
"qwen3-coder-flash", // no thinking support "qwen3-code-plus", // no thinking support
"custom-thinking-model", // openai-compatible model with forced thinking suffix "openai-compat", // openai-compatible channel (allowCompat=true)
} }
fromProtocols := []string{"openai", "claude", "gemini", "openai-response"} fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
toProtocols := []string{"gemini", "claude", "openai", "codex"} toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -664,27 +659,28 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
var cases []scenario var cases []scenario
switch from { switch from {
case "openai", "openai-response": case "openai", "openai-response":
// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
cases = []scenario{ cases = []scenario{
{name: "no-thinking", thinkingParam: nil}, {name: "no-thinking", thinkingParam: nil},
{name: "effort-auto", thinkingParam: "auto"},
{name: "effort-none", thinkingParam: "none"},
{name: "effort-minimal", thinkingParam: "minimal"},
{name: "effort-low", thinkingParam: "low"}, {name: "effort-low", thinkingParam: "low"},
{name: "effort-medium", thinkingParam: "medium"}, {name: "effort-medium", thinkingParam: "medium"},
{name: "effort-high", thinkingParam: "high"}, {name: "effort-high", thinkingParam: "high"},
{name: "effort-xhigh", thinkingParam: "xhigh"}, {name: "effort-xhigh", thinkingParam: "xhigh"},
{name: "effort-invalid-foo", thinkingParam: "foo"}, {name: "effort-invalid", thinkingParam: "invalid"},
} }
case "gemini": case "gemini", "claude":
// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
cases = []scenario{ cases = []scenario{
{name: "no-thinking", thinkingParam: nil}, {name: "no-thinking", thinkingParam: nil},
{name: "budget-1024", thinkingParam: 1024}, {name: "budget-neg1", thinkingParam: -1},
{name: "budget-8192", thinkingParam: 8192}, {name: "budget-0", thinkingParam: 0},
{name: "budget-16384", thinkingParam: 16384}, {name: "budget-1023", thinkingParam: 1023},
} {name: "budget-1025", thinkingParam: 1025},
case "claude": {name: "budget-8193", thinkingParam: 8193},
cases = []scenario{ {name: "budget-24577", thinkingParam: 24577},
{name: "no-thinking", thinkingParam: nil},
{name: "budget-1024", thinkingParam: 1024},
{name: "budget-8192", thinkingParam: 8192},
{name: "budget-16384", thinkingParam: 16384},
} }
} }
@@ -723,12 +719,16 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
} }
// Convert effort level to budget for non-level models only // Convert effort level to budget for non-level models only
if effort, ok := cs.thinkingParam.(string); ok && effort != "" { if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
// "none" disables thinking - no thinkingBudget in output
if strings.ToLower(effort) == "none" {
return false, "", false
}
if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
// ThinkingEffortToBudget already returns normalized budget // ThinkingEffortToBudget already returns normalized budget
return true, fmt.Sprintf("%d", budget), false return true, fmt.Sprintf("%d", budget), false
} }
// Invalid effort maps to default/fallback // Invalid effort maps to default auto (-1)
return true, fmt.Sprintf("%d", -1), false return true, "-1", false
} }
return false, "", false return false, "", false
case "claude": case "claude":
@@ -742,6 +742,11 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
} }
// Convert effort level to budget for non-level models only // Convert effort level to budget for non-level models only
if effort, ok := cs.thinkingParam.(string); ok && effort != "" { if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
// "none" and "auto" don't produce budget_tokens
lower := strings.ToLower(effort)
if lower == "none" || lower == "auto" {
return false, "", false
}
if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
// ThinkingEffortToBudget already returns normalized budget // ThinkingEffortToBudget already returns normalized budget
return true, fmt.Sprintf("%d", budget), false return true, fmt.Sprintf("%d", budget), false
@@ -780,6 +785,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
} }
if budget, ok := cs.thinkingParam.(int); ok { if budget, ok := cs.thinkingParam.(int); ok {
if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
// Check if the mapped effort is valid for this model
if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
return true, mapped, true // expect validation error
}
return true, mapped, false return true, mapped, false
} }
} }
@@ -797,6 +806,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
} }
if budget, ok := cs.thinkingParam.(int); ok { if budget, ok := cs.thinkingParam.(int); ok {
if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
// Check if the mapped effort is valid for this model
if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
return true, mapped, true // expect validation error
}
return true, mapped, false return true, mapped, false
} }
} }