mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 21:10:51 +08:00
refactor(iflow): simplify thinking config handling for GLM and MiniMax models
This commit is contained in:
@@ -740,7 +740,7 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
||||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
|
||||||
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||||
}
|
}
|
||||||
models := make([]*ModelInfo, 0, len(entries))
|
models := make([]*ModelInfo, 0, len(entries))
|
||||||
|
|||||||
@@ -441,21 +441,18 @@ func ensureToolsArray(body []byte) []byte {
|
|||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
|
// preserveReasoningContentInMessages checks if reasoning_content from assistant messages
|
||||||
// conversation history is preserved when sending to iFlow models that support thinking.
|
// is preserved in conversation history for iFlow models that support thinking.
|
||||||
// This is critical for multi-turn conversations where the model needs to see its previous
|
// This is helpful for multi-turn conversations where the model may benefit from seeing
|
||||||
// reasoning to maintain coherent thought chains across tool calls and conversation turns.
|
// its previous reasoning to maintain coherent thought chains.
|
||||||
//
|
//
|
||||||
// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
|
// For GLM-4.6/4.7 and MiniMax M2/M2.1, it is recommended to include the full assistant
|
||||||
// appended back into message history before the next call.
|
// response (including reasoning_content) in message history for better context continuity.
|
||||||
func preserveReasoningContentInMessages(body []byte) []byte {
|
func preserveReasoningContentInMessages(body []byte) []byte {
|
||||||
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||||
|
|
||||||
// Only apply to models that support thinking with history preservation
|
// Only apply to models that support thinking with history preservation
|
||||||
needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
|
needsPreservation := strings.HasPrefix(model, "glm-4") || strings.HasPrefix(model, "minimax-m2")
|
||||||
strings.HasPrefix(model, "glm-4-7") ||
|
|
||||||
strings.HasPrefix(model, "minimax-m2.1") ||
|
|
||||||
strings.HasPrefix(model, "minimax-m2-1")
|
|
||||||
|
|
||||||
if !needsPreservation {
|
if !needsPreservation {
|
||||||
return body
|
return body
|
||||||
@@ -493,45 +490,34 @@ func preserveReasoningContentInMessages(body []byte) []byte {
|
|||||||
// This should be called after NormalizeThinkingConfig has processed the payload.
|
// This should be called after NormalizeThinkingConfig has processed the payload.
|
||||||
//
|
//
|
||||||
// Model-specific handling:
|
// Model-specific handling:
|
||||||
// - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
|
// - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
|
||||||
// - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
// - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
||||||
// - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
|
|
||||||
func applyIFlowThinkingConfig(body []byte) []byte {
|
func applyIFlowThinkingConfig(body []byte) []byte {
|
||||||
effort := gjson.GetBytes(body, "reasoning_effort")
|
effort := gjson.GetBytes(body, "reasoning_effort")
|
||||||
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
if !effort.Exists() {
|
||||||
|
return body
|
||||||
// Check if thinking should be enabled
|
|
||||||
val := ""
|
|
||||||
if effort.Exists() {
|
|
||||||
val = strings.ToLower(strings.TrimSpace(effort.String()))
|
|
||||||
}
|
}
|
||||||
enableThinking := effort.Exists() && val != "none" && val != ""
|
|
||||||
|
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||||
|
val := strings.ToLower(strings.TrimSpace(effort.String()))
|
||||||
|
enableThinking := val != "none" && val != ""
|
||||||
|
|
||||||
// Remove reasoning_effort as we'll convert to model-specific format
|
// Remove reasoning_effort as we'll convert to model-specific format
|
||||||
if effort.Exists() {
|
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
||||||
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
|
||||||
}
|
|
||||||
|
|
||||||
// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
|
// GLM-4.6/4.7: Use chat_template_kwargs
|
||||||
if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
|
if strings.HasPrefix(model, "glm-4") {
|
||||||
if enableThinking {
|
|
||||||
body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
|
|
||||||
body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
|
|
||||||
}
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
|
|
||||||
if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
|
|
||||||
if enableThinking {
|
|
||||||
body, _ = sjson.SetBytes(body, "reasoning_split", true)
|
|
||||||
}
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
|
|
||||||
if effort.Exists() {
|
|
||||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||||
|
if enableThinking {
|
||||||
|
body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// MiniMax M2/M2.1: Use reasoning_split
|
||||||
|
if strings.HasPrefix(model, "minimax-m2") {
|
||||||
|
body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
|
||||||
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
return body
|
return body
|
||||||
|
|||||||
Reference in New Issue
Block a user