diff --git a/internal/registry/model_definitions_static_data.go b/internal/registry/model_definitions_static_data.go index b1a524fb..cf5f1402 100644 --- a/internal/registry/model_definitions_static_data.go +++ b/internal/registry/model_definitions_static_data.go @@ -784,7 +784,7 @@ func GetIFlowModels() []*ModelInfo { {ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800}, {ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000}, {ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000}, - {ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400}, + {ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport}, {ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400}, {ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport}, {ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport}, @@ -792,8 +792,8 @@ func GetIFlowModels() []*ModelInfo { {ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200}, {ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000}, {ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000}, - {ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000}, - {ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200}, + {ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport}, + {ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport}, {ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200}, {ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200}, {ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400}, diff --git a/internal/thinking/provider/iflow/apply.go b/internal/thinking/provider/iflow/apply.go index da986d22..35d13f59 100644 --- a/internal/thinking/provider/iflow/apply.go +++ b/internal/thinking/provider/iflow/apply.go @@ -1,7 +1,7 @@ -// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax). +// Package iflow implements thinking configuration for iFlow models. // // iFlow models use boolean toggle semantics: -// - GLM models: chat_template_kwargs.enable_thinking (boolean) +// - Models using chat_template_kwargs.enable_thinking (boolean toggle) // - MiniMax models: reasoning_split (boolean) // // Level values are converted to boolean: none=false, all others=true @@ -20,6 +20,7 @@ import ( // Applier implements thinking.ProviderApplier for iFlow models. // // iFlow-specific behavior: +// - enable_thinking toggle models: enable_thinking boolean // - GLM models: enable_thinking boolean + clear_thinking=false // - MiniMax models: reasoning_split boolean // - Level to boolean: none=false, others=true @@ -61,8 +62,8 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo * return body, nil } - if isGLMModel(modelInfo.ID) { - return applyGLM(body, config), nil + if isEnableThinkingModel(modelInfo.ID) { + return applyEnableThinking(body, config, isGLMModel(modelInfo.ID)), nil } if isMiniMaxModel(modelInfo.ID) { @@ -97,7 +98,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool { } } -// applyGLM applies thinking configuration for GLM models. +// applyEnableThinking applies thinking configuration for models that use +// chat_template_kwargs.enable_thinking format. // // Output format when enabled: // @@ -107,9 +109,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool { // // {"chat_template_kwargs": {"enable_thinking": false}} // -// Note: clear_thinking is only set when thinking is enabled, to preserve -// thinking output in the response. -func applyGLM(body []byte, config thinking.ThinkingConfig) []byte { +// Note: clear_thinking is only set for GLM models when thinking is enabled. +func applyEnableThinking(body []byte, config thinking.ThinkingConfig, setClearThinking bool) []byte { enableThinking := configToBoolean(config) if len(body) == 0 || !gjson.ValidBytes(body) { @@ -118,8 +119,11 @@ func applyGLM(body []byte, config thinking.ThinkingConfig) []byte { result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking) + // clear_thinking is a GLM-only knob, strip it for other models. + result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking") + // clear_thinking only needed when thinking is enabled - if enableThinking { + if enableThinking && setClearThinking { result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false) } @@ -143,8 +147,21 @@ func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte { return result } +// isEnableThinkingModel determines if the model uses chat_template_kwargs.enable_thinking format. +func isEnableThinkingModel(modelID string) bool { + if isGLMModel(modelID) { + return true + } + id := strings.ToLower(modelID) + switch id { + case "qwen3-max-preview", "deepseek-v3.2", "deepseek-v3.1": + return true + default: + return false + } +} + // isGLMModel determines if the model is a GLM series model. -// GLM models use chat_template_kwargs.enable_thinking format. func isGLMModel(modelID string) bool { return strings.HasPrefix(strings.ToLower(modelID), "glm") } diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index 3ad26ea6..fc20199e 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -2,6 +2,7 @@ package test import ( "fmt" + "strings" "testing" "time" @@ -2778,12 +2779,18 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) { // Verify clear_thinking for iFlow GLM models when enable_thinking=true if tc.to == "iflow" && tc.expectField == "chat_template_kwargs.enable_thinking" && tc.expectValue == "true" { + baseModel := thinking.ParseSuffix(tc.model).ModelName + isGLM := strings.HasPrefix(strings.ToLower(baseModel), "glm") ctVal := gjson.GetBytes(body, "chat_template_kwargs.clear_thinking") - if !ctVal.Exists() { - t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body)) - } - if ctVal.Bool() != false { - t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body)) + if isGLM { + if !ctVal.Exists() { + t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body)) + } + if ctVal.Bool() != false { + t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body)) + } + } else if ctVal.Exists() { + t.Fatalf("expected no clear_thinking field for non-GLM enable_thinking model, body=%s", string(body)) } } })