mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 04:50:52 +08:00
feat(iflow): add model-specific thinking configs for GLM-4.7 and MiniMax-M2.1
- GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
- MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
- Added preserveReasoningContentInMessages() to support re-injection of reasoning
content in assistant message history for multi-turn conversations
- Added ThinkingSupport to MiniMax-M2.1 model definition
This commit is contained in:
@@ -741,7 +741,7 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
||||||
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000},
|
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||||
}
|
}
|
||||||
models := make([]*ModelInfo, 0, len(entries))
|
models := make([]*ModelInfo, 0, len(entries))
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
|||||||
return resp, errValidate
|
return resp, errValidate
|
||||||
}
|
}
|
||||||
body = applyIFlowThinkingConfig(body)
|
body = applyIFlowThinkingConfig(body)
|
||||||
|
body = preserveReasoningContentInMessages(body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
|
||||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||||
@@ -159,6 +160,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
return nil, errValidate
|
return nil, errValidate
|
||||||
}
|
}
|
||||||
body = applyIFlowThinkingConfig(body)
|
body = applyIFlowThinkingConfig(body)
|
||||||
|
body = preserveReasoningContentInMessages(body)
|
||||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
toolsResult := gjson.GetBytes(body, "tools")
|
||||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||||
@@ -445,20 +447,98 @@ func ensureToolsArray(body []byte) []byte {
|
|||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
// applyIFlowThinkingConfig converts normalized reasoning_effort to iFlow chat_template_kwargs.enable_thinking.
|
// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
|
||||||
// This should be called after NormalizeThinkingConfig has processed the payload.
|
// conversation history is preserved when sending to iFlow models that support thinking.
|
||||||
// iFlow only supports boolean enable_thinking, so any non-"none" effort enables thinking.
|
// This is critical for multi-turn conversations where the model needs to see its previous
|
||||||
func applyIFlowThinkingConfig(body []byte) []byte {
|
// reasoning to maintain coherent thought chains across tool calls and conversation turns.
|
||||||
effort := gjson.GetBytes(body, "reasoning_effort")
|
//
|
||||||
if !effort.Exists() {
|
// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
|
||||||
|
// appended back into message history before the next call.
|
||||||
|
func preserveReasoningContentInMessages(body []byte) []byte {
|
||||||
|
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||||
|
|
||||||
|
// Only apply to models that support thinking with history preservation
|
||||||
|
needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
|
||||||
|
strings.HasPrefix(model, "glm-4-7") ||
|
||||||
|
strings.HasPrefix(model, "minimax-m2.1") ||
|
||||||
|
strings.HasPrefix(model, "minimax-m2-1")
|
||||||
|
|
||||||
|
if !needsPreservation {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
val := strings.ToLower(strings.TrimSpace(effort.String()))
|
messages := gjson.GetBytes(body, "messages")
|
||||||
enableThinking := val != "none" && val != ""
|
if !messages.Exists() || !messages.IsArray() {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
// Check if any assistant message already has reasoning_content preserved
|
||||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
hasReasoningContent := false
|
||||||
|
messages.ForEach(func(_, msg gjson.Result) bool {
|
||||||
|
role := msg.Get("role").String()
|
||||||
|
if role == "assistant" {
|
||||||
|
rc := msg.Get("reasoning_content")
|
||||||
|
if rc.Exists() && rc.String() != "" {
|
||||||
|
hasReasoningContent = true
|
||||||
|
return false // stop iteration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
// If reasoning content is already present, the messages are properly formatted
|
||||||
|
// No need to modify - the client has correctly preserved reasoning in history
|
||||||
|
if hasReasoningContent {
|
||||||
|
log.Debugf("iflow executor: reasoning_content found in message history for %s", model)
|
||||||
|
}
|
||||||
|
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
|
||||||
|
// This should be called after NormalizeThinkingConfig has processed the payload.
|
||||||
|
//
|
||||||
|
// Model-specific handling:
|
||||||
|
// - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
|
||||||
|
// - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
||||||
|
// - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
|
||||||
|
func applyIFlowThinkingConfig(body []byte) []byte {
|
||||||
|
effort := gjson.GetBytes(body, "reasoning_effort")
|
||||||
|
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||||
|
|
||||||
|
// Check if thinking should be enabled
|
||||||
|
val := ""
|
||||||
|
if effort.Exists() {
|
||||||
|
val = strings.ToLower(strings.TrimSpace(effort.String()))
|
||||||
|
}
|
||||||
|
enableThinking := effort.Exists() && val != "none" && val != ""
|
||||||
|
|
||||||
|
// Remove reasoning_effort as we'll convert to model-specific format
|
||||||
|
if effort.Exists() {
|
||||||
|
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
||||||
|
}
|
||||||
|
|
||||||
|
// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
|
||||||
|
if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
|
||||||
|
if enableThinking {
|
||||||
|
body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
|
||||||
|
body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
|
||||||
|
if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
|
||||||
|
if enableThinking {
|
||||||
|
body, _ = sjson.SetBytes(body, "reasoning_split", true)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
|
||||||
|
if effort.Exists() {
|
||||||
|
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||||
|
}
|
||||||
|
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user