feat(iflow): add model-specific thinking configs for GLM-4.7 and MiniMax-M2.1

- GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false} - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation - Added preserveReasoningContentInMessages() to support re-injection of reasoning content in assistant message history for multi-turn conversations - Added ThinkingSupport to MiniMax-M2.1 model definition
2026-02-03 04:50:52 +08:00 · 2025-12-26 15:41:48 +01:00
parent d222469b44
commit 6403ff4ec4
2 changed files with 91 additions and 11 deletions
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -741,7 +741,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
-		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000},
+		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -67,6 +67,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		return resp, errValidate
 	}
 	body = applyIFlowThinkingConfig(body)
 	body = preserveReasoningContentInMessages(body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -159,6 +160,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		return nil, errValidate
 	}
 	body = applyIFlowThinkingConfig(body)
 	body = preserveReasoningContentInMessages(body)
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
@@ -445,20 +447,98 @@ func ensureToolsArray(body []byte) []byte {
 	return updated
 }
-// applyIFlowThinkingConfig converts normalized reasoning_effort to iFlow chat_template_kwargs.enable_thinking.
+// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
-// This should be called after NormalizeThinkingConfig has processed the payload.
+// conversation history is preserved when sending to iFlow models that support thinking.
-// iFlow only supports boolean enable_thinking, so any non-"none" effort enables thinking.
+// This is critical for multi-turn conversations where the model needs to see its previous
-func applyIFlowThinkingConfig(body []byte) []byte {
+// reasoning to maintain coherent thought chains across tool calls and conversation turns.
-	effort := gjson.GetBytes(body, "reasoning_effort")
+//
-	if !effort.Exists() {
+// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
 // appended back into message history before the next call.
 func preserveReasoningContentInMessages(body []byte) []byte {
 	model := strings.ToLower(gjson.GetBytes(body, "model").String())
 	// Only apply to models that support thinking with history preservation
 	needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
 		strings.HasPrefix(model, "glm-4-7") ||
 		strings.HasPrefix(model, "minimax-m2.1") ||
 		strings.HasPrefix(model, "minimax-m2-1")
 	if !needsPreservation {
 		return body
 	}
-	val := strings.ToLower(strings.TrimSpace(effort.String()))
+	messages := gjson.GetBytes(body, "messages")
-	enableThinking := val != "none" && val != ""
+	if !messages.Exists() || !messages.IsArray() {
 		return body
 	}
-	body, _ = sjson.DeleteBytes(body, "reasoning_effort")
+	// Check if any assistant message already has reasoning_content preserved
-	body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
+	hasReasoningContent := false
 	messages.ForEach(func(_, msg gjson.Result) bool {
 		role := msg.Get("role").String()
 		if role == "assistant" {
 			rc := msg.Get("reasoning_content")
 			if rc.Exists() && rc.String() != "" {
 				hasReasoningContent = true
 				return false // stop iteration
 			}
 		}
 		return true
 	})
 	// If reasoning content is already present, the messages are properly formatted
 	// No need to modify - the client has correctly preserved reasoning in history
 	if hasReasoningContent {
 		log.Debugf("iflow executor: reasoning_content found in message history for %s", model)
 	}
 	return body
 }
 // applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
 // This should be called after NormalizeThinkingConfig has processed the payload.
 //
 // Model-specific handling:
 //   - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
 //   - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
 //   - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
 func applyIFlowThinkingConfig(body []byte) []byte {
 	effort := gjson.GetBytes(body, "reasoning_effort")
 	model := strings.ToLower(gjson.GetBytes(body, "model").String())
 	// Check if thinking should be enabled
 	val := ""
 	if effort.Exists() {
 		val = strings.ToLower(strings.TrimSpace(effort.String()))
 	}
 	enableThinking := effort.Exists() && val != "none" && val != ""
 	// Remove reasoning_effort as we'll convert to model-specific format
 	if effort.Exists() {
 		body, _ = sjson.DeleteBytes(body, "reasoning_effort")
 	}
 	// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
 	if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
 		if enableThinking {
 			body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
 			body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
 		}
 		return body
 	}
 	// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
 	if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
 		if enableThinking {
 			body, _ = sjson.SetBytes(body, "reasoning_split", true)
 		}
 		return body
 	}
 	// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
 	if effort.Exists() {
 		body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
 	}
 	return body
 }