From b326ec364150b39207e6c9652643f037a2bb7d26 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 16 Dec 2025 14:22:05 +0800 Subject: [PATCH 1/5] feat(iflow): add thinking support for iFlow models --- internal/registry/model_definitions.go | 15 +++++++++++---- internal/runtime/executor/iflow_executor.go | 20 ++++++++++++++++++++ internal/runtime/executor/payload_helpers.go | 2 +- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 7a4bdf0c..ca894ba6 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -630,6 +630,13 @@ func GetQwenModels() []*ModelInfo { } } +// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models +// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle). +// Uses level-based configuration so standard normalization flows apply before conversion. +var iFlowThinkingSupport = &ThinkingSupport{ + Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}, +} + // GetIFlowModels returns supported models for iFlow OAuth accounts. func GetIFlowModels() []*ModelInfo { entries := []struct { @@ -645,9 +652,9 @@ func GetIFlowModels() []*ModelInfo { {ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000}, {ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400}, {ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400}, - {ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400}, + {ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport}, {ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000}, - {ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, + {ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200}, {ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000}, {ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000}, {ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000}, @@ -655,10 +662,10 @@ func GetIFlowModels() []*ModelInfo { {ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200}, {ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200}, {ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400}, - {ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, + {ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600}, {ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600}, {ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600}, - {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, + {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000}, } models := make([]*ModelInfo, 0, len(entries)) for _, entry := range entries { diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index ad0b4d2a..0ed3c111 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -66,6 +66,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } + body = applyIFlowThinkingConfig(body) body = applyPayloadConfig(e.cfg, req.Model, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -157,6 +158,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate } + body = applyIFlowThinkingConfig(body) // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. toolsResult := gjson.GetBytes(body, "tools") if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { @@ -442,3 +444,21 @@ func ensureToolsArray(body []byte) []byte { } return updated } + +// applyIFlowThinkingConfig converts normalized reasoning_effort to iFlow chat_template_kwargs.enable_thinking. +// This should be called after NormalizeThinkingConfig has processed the payload. +// iFlow only supports boolean enable_thinking, so any non-"none" effort enables thinking. +func applyIFlowThinkingConfig(body []byte) []byte { + effort := gjson.GetBytes(body, "reasoning_effort") + if !effort.Exists() { + return body + } + + val := strings.ToLower(strings.TrimSpace(effort.String())) + enableThinking := val != "none" && val != "" + + body, _ = sjson.DeleteBytes(body, "reasoning_effort") + body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking) + + return body +} diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index b0eafbb7..ff2d6ab4 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -273,7 +273,7 @@ func StripThinkingFields(payload []byte, effortOnly bool) []byte { "reasoning.effort", } if !effortOnly { - fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...) + fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...) } out := payload for _, field := range fieldsToRemove { From 28a428ae2f8b8e2b96d749c07496b51a598eaa70 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 16 Dec 2025 18:07:20 +0800 Subject: [PATCH 2/5] fix(thinking): align budget effort mapping across translators Unify thinking budget-to-effort conversion in a shared helper, handle disabled/default thinking cases in translators, adjust zero-budget mapping, and drop the old OpenAI-specific helper with updated tests. --- internal/runtime/executor/payload_helpers.go | 8 +- .../codex/claude/codex_claude_request.go | 9 ++- .../codex/gemini/codex_gemini_request.go | 2 +- .../openai/claude/openai_claude_request.go | 20 ++++- .../openai/gemini/openai_gemini_request.go | 2 +- internal/util/openai_thinking.go | 37 --------- internal/util/thinking.go | 80 +++++++++++++++++++ internal/util/thinking_suffix.go | 30 ------- test/thinking_conversion_test.go | 20 ++--- 9 files changed, 116 insertions(+), 92 deletions(-) delete mode 100644 internal/util/openai_thinking.go diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index ff2d6ab4..adb224a8 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -72,13 +72,7 @@ func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model // Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models. if util.ModelUsesThinkingLevels(baseModel) || allowCompat { if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" { - if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) { - if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported { - return StripThinkingFields(payload, false) - } - } - + if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" { if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go index 414efa89..41fd2764 100644 --- a/internal/translator/codex/claude/codex_claude_request.go +++ b/internal/translator/codex/claude/codex_claude_request.go @@ -219,15 +219,20 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) // Convert thinking.budget_tokens to reasoning.effort for level-based models reasoningEffort := "medium" // default if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() { - if thinking.Get("type").String() == "enabled" { + switch thinking.Get("type").String() { + case "enabled": if util.ModelUsesThinkingLevels(modelName) { if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() { budget := int(budgetTokens.Int()) - if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { reasoningEffort = effort } } } + case "disabled": + if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" { + reasoningEffort = effort + } } } template, _ = sjson.Set(template, "reasoning.effort", reasoningEffort) diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go index c2dacd3e..91a38029 100644 --- a/internal/translator/codex/gemini/codex_gemini_request.go +++ b/internal/translator/codex/gemini/codex_gemini_request.go @@ -253,7 +253,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool) if util.ModelUsesThinkingLevels(modelName) { if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { budget := int(thinkingBudget.Int()) - if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { reasoningEffort = effort } } diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go index 0ee8c225..e61ec521 100644 --- a/internal/translator/openai/claude/openai_claude_request.go +++ b/internal/translator/openai/claude/openai_claude_request.go @@ -63,10 +63,22 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream // Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() { - if thinkingType := thinking.Get("type"); thinkingType.Exists() && thinkingType.String() == "enabled" { - if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() { - budget := int(budgetTokens.Int()) - if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if thinkingType := thinking.Get("type"); thinkingType.Exists() { + switch thinkingType.String() { + case "enabled": + if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() { + budget := int(budgetTokens.Int()) + if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + out, _ = sjson.Set(out, "reasoning_effort", effort) + } + } else { + // No budget_tokens specified, default to "auto" for enabled thinking + if effort, ok := util.ThinkingBudgetToEffort(modelName, -1); ok && effort != "" { + out, _ = sjson.Set(out, "reasoning_effort", effort) + } + } + case "disabled": + if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go index cca6ebf7..032ca60d 100644 --- a/internal/translator/openai/gemini/openai_gemini_request.go +++ b/internal/translator/openai/gemini/openai_gemini_request.go @@ -83,7 +83,7 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() { if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { budget := int(thinkingBudget.Int()) - if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } diff --git a/internal/util/openai_thinking.go b/internal/util/openai_thinking.go deleted file mode 100644 index 5ce7e6bf..00000000 --- a/internal/util/openai_thinking.go +++ /dev/null @@ -1,37 +0,0 @@ -package util - -// OpenAIThinkingBudgetToEffort maps a numeric thinking budget (tokens) -// into an OpenAI-style reasoning effort level for level-based models. -// -// Ranges: -// - 0 -> "none" -// - -1 -> "auto" -// - 1..1024 -> "low" -// - 1025..8192 -> "medium" -// - 8193..24576 -> "high" -// - 24577.. -> highest supported level for the model (defaults to "xhigh") -// -// Negative values other than -1 are treated as unsupported. -func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) { - switch { - case budget == -1: - return "auto", true - case budget < -1: - return "", false - case budget == 0: - return "none", true - case budget > 0 && budget <= 1024: - return "low", true - case budget <= 8192: - return "medium", true - case budget <= 24576: - return "high", true - case budget > 24576: - if levels := GetModelThinkingLevels(model); len(levels) > 0 { - return levels[len(levels)-1], true - } - return "xhigh", true - default: - return "", false - } -} diff --git a/internal/util/thinking.go b/internal/util/thinking.go index 793134fc..77ec16ba 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -118,3 +118,83 @@ func IsOpenAICompatibilityModel(model string) bool { } return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility") } + +// ThinkingEffortToBudget maps a reasoning effort level to a numeric thinking budget (tokens), +// clamping the result to the model's supported range. +// +// Mappings (values are normalized to model's supported range): +// - "none" -> 0 +// - "auto" -> -1 +// - "minimal" -> 512 +// - "low" -> 1024 +// - "medium" -> 8192 +// - "high" -> 24576 +// - "xhigh" -> 32768 +// +// Returns false when the effort level is empty or unsupported. +func ThinkingEffortToBudget(model, effort string) (int, bool) { + if effort == "" { + return 0, false + } + normalized, ok := NormalizeReasoningEffortLevel(model, effort) + if !ok { + normalized = strings.ToLower(strings.TrimSpace(effort)) + } + switch normalized { + case "none": + return 0, true + case "auto": + return NormalizeThinkingBudget(model, -1), true + case "minimal": + return NormalizeThinkingBudget(model, 512), true + case "low": + return NormalizeThinkingBudget(model, 1024), true + case "medium": + return NormalizeThinkingBudget(model, 8192), true + case "high": + return NormalizeThinkingBudget(model, 24576), true + case "xhigh": + return NormalizeThinkingBudget(model, 32768), true + default: + return 0, false + } +} + +// ThinkingBudgetToEffort maps a numeric thinking budget (tokens) +// to a reasoning effort level for level-based models. +// +// Mappings: +// - 0 -> "none" (or lowest supported level if model doesn't support "none") +// - -1 -> "auto" +// - 1..1024 -> "low" +// - 1025..8192 -> "medium" +// - 8193..24576 -> "high" +// - 24577.. -> highest supported level for the model (defaults to "xhigh") +// +// Returns false when the budget is unsupported (negative values other than -1). +func ThinkingBudgetToEffort(model string, budget int) (string, bool) { + switch { + case budget == -1: + return "auto", true + case budget < -1: + return "", false + case budget == 0: + if levels := GetModelThinkingLevels(model); len(levels) > 0 { + return levels[0], true + } + return "none", true + case budget > 0 && budget <= 1024: + return "low", true + case budget <= 8192: + return "medium", true + case budget <= 24576: + return "high", true + case budget > 24576: + if levels := GetModelThinkingLevels(model); len(levels) > 0 { + return levels[len(levels)-1], true + } + return "xhigh", true + default: + return "", false + } +} diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index b877e109..ff3b24a6 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -201,36 +201,6 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { return "", true } -// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets, -// clamping the result to the model's supported range. -func ThinkingEffortToBudget(model, effort string) (int, bool) { - if effort == "" { - return 0, false - } - normalized, ok := NormalizeReasoningEffortLevel(model, effort) - if !ok { - normalized = strings.ToLower(strings.TrimSpace(effort)) - } - switch normalized { - case "none": - return 0, true - case "auto": - return NormalizeThinkingBudget(model, -1), true - case "minimal": - return NormalizeThinkingBudget(model, 512), true - case "low": - return NormalizeThinkingBudget(model, 1024), true - case "medium": - return NormalizeThinkingBudget(model, 8192), true - case "high": - return NormalizeThinkingBudget(model, 24576), true - case "xhigh": - return NormalizeThinkingBudget(model, 32768), true - default: - return 0, false - } -} - // ResolveOriginalModel returns the original model name stored in metadata (if present), // otherwise falls back to the provided model. func ResolveOriginalModel(model string, metadata map[string]any) string { diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index 6d156954..d93ff648 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -295,7 +295,7 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { } // Check numeric budget fallback for allowCompat if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { + if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { return true, mapped, false } } @@ -308,7 +308,7 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { effort, ok := util.ReasoningEffortFromMetadata(metadata) if !ok || strings.TrimSpace(effort) == "" { if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap { + if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap { effort = mapped ok = true } @@ -336,7 +336,7 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { return false, "", true } if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { + if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { mapped = strings.ToLower(strings.TrimSpace(mapped)) if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel { return true, normalized, false @@ -609,7 +609,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) { return true, normalized, false } if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { + if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { return true, mapped, false } } @@ -625,7 +625,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) { return false, "", true // invalid level } if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { + if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { // Check if the mapped effort is valid for this model if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel { return true, mapped, true // expect validation error @@ -646,7 +646,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) { return false, "", true } if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { + if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { // Check if the mapped effort is valid for this model if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel { return true, mapped, true // expect validation error @@ -721,7 +721,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) { } } -func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) { +func TestThinkingBudgetToEffortRanges(t *testing.T) { cleanup := registerCoreModels(t) defer cleanup() @@ -733,7 +733,7 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) { ok bool }{ {name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true}, - {name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true}, + {name: "zero-none", model: "gpt-5", budget: 0, want: "minimal", ok: true}, {name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true}, {name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true}, {name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true}, @@ -741,14 +741,14 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) { {name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true}, {name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true}, {name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true}, - {name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true}, + {name: "over-max-xhigh-model", model: "gpt-5.2", budget: 64000, want: "xhigh", ok: true}, {name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false}, } for _, cs := range cases { cs := cs t.Run(cs.name, func(t *testing.T) { - got, ok := util.OpenAIThinkingBudgetToEffort(cs.model, cs.budget) + got, ok := util.ThinkingBudgetToEffort(cs.model, cs.budget) if ok != cs.ok { t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok) } From 9df96a4bb406ace21ddc800418051e3130bdcdec Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 16 Dec 2025 18:29:34 +0800 Subject: [PATCH 3/5] test(thinking): add effort to budget coverage --- test/thinking_conversion_test.go | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index d93ff648..74a1bd8a 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -721,7 +721,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) { } } -func TestThinkingBudgetToEffortRanges(t *testing.T) { +func TestThinkingBudgetToEffort(t *testing.T) { cleanup := registerCoreModels(t) defer cleanup() @@ -758,3 +758,41 @@ func TestThinkingBudgetToEffortRanges(t *testing.T) { }) } } + +func TestThinkingEffortToBudget(t *testing.T) { + cleanup := registerCoreModels(t) + defer cleanup() + + cases := []struct { + name string + model string + effort string + want int + ok bool + }{ + {name: "none", model: "gemini-2.5-pro", effort: "none", want: 0, ok: true}, + {name: "auto", model: "gemini-2.5-pro", effort: "auto", want: -1, ok: true}, + {name: "minimal", model: "gemini-2.5-pro", effort: "minimal", want: 512, ok: true}, + {name: "low", model: "gemini-2.5-pro", effort: "low", want: 1024, ok: true}, + {name: "medium", model: "gemini-2.5-pro", effort: "medium", want: 8192, ok: true}, + {name: "high", model: "gemini-2.5-pro", effort: "high", want: 24576, ok: true}, + {name: "xhigh", model: "gemini-2.5-pro", effort: "xhigh", want: 32768, ok: true}, + {name: "empty-unsupported", model: "gemini-2.5-pro", effort: "", want: 0, ok: false}, + {name: "invalid-unsupported", model: "gemini-2.5-pro", effort: "ultra", want: 0, ok: false}, + {name: "case-insensitive", model: "gemini-2.5-pro", effort: "LOW", want: 1024, ok: true}, + {name: "case-insensitive-medium", model: "gemini-2.5-pro", effort: "MEDIUM", want: 8192, ok: true}, + } + + for _, cs := range cases { + cs := cs + t.Run(cs.name, func(t *testing.T) { + got, ok := util.ThinkingEffortToBudget(cs.model, cs.effort) + if ok != cs.ok { + t.Fatalf("ok mismatch for model=%s effort=%s: expect %v got %v", cs.model, cs.effort, cs.ok, ok) + } + if got != cs.want { + t.Fatalf("value mismatch for model=%s effort=%s: expect %d got %d", cs.model, cs.effort, cs.want, got) + } + }) + } +} From 52b63063886059d3a23fae21e5750a329efe1447 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 17 Dec 2025 01:07:26 +0800 Subject: [PATCH 4/5] feat(config): add support for model prefixes and prefix normalization Refactor model management to include an optional `prefix` field for model credentials, enabling better namespace handling. Update affected configuration files, APIs, and handlers to support prefix normalization and routing. Remove unused OpenAI compatibility provider logic to simplify processing. --- config.example.yaml | 8 ++++ internal/api/server.go | 12 +---- internal/config/config.go | 28 +++++++++++ internal/config/vertex_compat.go | 4 ++ internal/watcher/watcher.go | 46 +++++++++++++++---- sdk/api/handlers/handlers.go | 67 ++++++--------------------- sdk/cliproxy/auth/manager.go | 79 +++++++++++++++++++++++++++----- sdk/cliproxy/auth/types.go | 2 + sdk/cliproxy/service.go | 46 ++++++++++++++++++- sdk/config/config.go | 5 ++ 10 files changed, 210 insertions(+), 87 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index e93d71b6..563dd06c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -48,6 +48,9 @@ usage-statistics-enabled: false # Proxy URL. Supports socks5/http/https protocols. Example: socks5://user:pass@192.168.1.1:1080/ proxy-url: "" +# When true, unprefixed model requests only use credentials without a prefix (except when prefix == model name). +force-model-prefix: false + # Number of times to retry a request. Retries will occur if the HTTP response code is 403, 408, 500, 502, 503, or 504. request-retry: 3 @@ -65,6 +68,7 @@ ws-auth: false # Gemini API keys # gemini-api-key: # - api-key: "AIzaSy...01" +# prefix: "test" # optional: require calls like "test/gemini-3-pro-preview" to target this credential # base-url: "https://generativelanguage.googleapis.com" # headers: # X-Custom-Header: "custom-value" @@ -79,6 +83,7 @@ ws-auth: false # Codex API keys # codex-api-key: # - api-key: "sk-atSM..." +# prefix: "test" # optional: require calls like "test/gpt-5-codex" to target this credential # base-url: "https://www.example.com" # use the custom codex API endpoint # headers: # X-Custom-Header: "custom-value" @@ -93,6 +98,7 @@ ws-auth: false # claude-api-key: # - api-key: "sk-atSM..." # use the official claude API key, no need to set the base url # - api-key: "sk-atSM..." +# prefix: "test" # optional: require calls like "test/claude-sonnet-latest" to target this credential # base-url: "https://www.example.com" # use the custom claude API endpoint # headers: # X-Custom-Header: "custom-value" @@ -109,6 +115,7 @@ ws-auth: false # OpenAI compatibility providers # openai-compatibility: # - name: "openrouter" # The name of the provider; it will be used in the user agent and other places. +# prefix: "test" # optional: require calls like "test/kimi-k2" to target this provider's credentials # base-url: "https://openrouter.ai/api/v1" # The base URL of the provider. # headers: # X-Custom-Header: "custom-value" @@ -123,6 +130,7 @@ ws-auth: false # Vertex API keys (Vertex-compatible endpoints, use API key + base URL) # vertex-api-key: # - api-key: "vk-123..." # x-goog-api-key header +# prefix: "test" # optional: require calls like "test/vertex-pro" to target this credential # base-url: "https://example.com/api" # e.g. https://zenmux.ai/api # proxy-url: "socks5://proxy.example.com:1080" # optional per-key proxy override # headers: diff --git a/internal/api/server.go b/internal/api/server.go index af28e6ad..5ffffa1d 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -230,13 +230,9 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk envManagementSecret := envAdminPasswordSet && envAdminPassword != "" // Create server instance - providerNames := make([]string, 0, len(cfg.OpenAICompatibility)) - for _, p := range cfg.OpenAICompatibility { - providerNames = append(providerNames, p.Name) - } s := &Server{ engine: engine, - handlers: handlers.NewBaseAPIHandlers(&cfg.SDKConfig, authManager, providerNames), + handlers: handlers.NewBaseAPIHandlers(&cfg.SDKConfig, authManager), cfg: cfg, accessManager: accessManager, requestLogger: requestLogger, @@ -919,12 +915,6 @@ func (s *Server) UpdateClients(cfg *config.Config) { // Save YAML snapshot for next comparison s.oldConfigYaml, _ = yaml.Marshal(cfg) - providerNames := make([]string, 0, len(cfg.OpenAICompatibility)) - for _, p := range cfg.OpenAICompatibility { - providerNames = append(providerNames, p.Name) - } - s.handlers.OpenAICompatProviders = providerNames - s.handlers.UpdateClients(&cfg.SDKConfig) if !cfg.RemoteManagement.DisableControlPanel { diff --git a/internal/config/config.go b/internal/config/config.go index 2310d7c2..63ac1cb0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -187,6 +187,9 @@ type ClaudeKey struct { // APIKey is the authentication key for accessing Claude API services. APIKey string `yaml:"api-key" json:"api-key"` + // Prefix optionally namespaces models for this credential (e.g., "teamA/claude-sonnet-4"). + Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` + // BaseURL is the base URL for the Claude API endpoint. // If empty, the default Claude API URL will be used. BaseURL string `yaml:"base-url" json:"base-url"` @@ -219,6 +222,9 @@ type CodexKey struct { // APIKey is the authentication key for accessing Codex API services. APIKey string `yaml:"api-key" json:"api-key"` + // Prefix optionally namespaces models for this credential (e.g., "teamA/gpt-5-codex"). + Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` + // BaseURL is the base URL for the Codex API endpoint. // If empty, the default Codex API URL will be used. BaseURL string `yaml:"base-url" json:"base-url"` @@ -239,6 +245,9 @@ type GeminiKey struct { // APIKey is the authentication key for accessing Gemini API services. APIKey string `yaml:"api-key" json:"api-key"` + // Prefix optionally namespaces models for this credential (e.g., "teamA/gemini-3-pro-preview"). + Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` + // BaseURL optionally overrides the Gemini API endpoint. BaseURL string `yaml:"base-url,omitempty" json:"base-url,omitempty"` @@ -258,6 +267,9 @@ type OpenAICompatibility struct { // Name is the identifier for this OpenAI compatibility configuration. Name string `yaml:"name" json:"name"` + // Prefix optionally namespaces model aliases for this provider (e.g., "teamA/kimi-k2"). + Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` + // BaseURL is the base URL for the external OpenAI-compatible API endpoint. BaseURL string `yaml:"base-url" json:"base-url"` @@ -422,6 +434,7 @@ func (cfg *Config) SanitizeOpenAICompatibility() { for i := range cfg.OpenAICompatibility { e := cfg.OpenAICompatibility[i] e.Name = strings.TrimSpace(e.Name) + e.Prefix = normalizeModelPrefix(e.Prefix) e.BaseURL = strings.TrimSpace(e.BaseURL) e.Headers = NormalizeHeaders(e.Headers) if e.BaseURL == "" { @@ -442,6 +455,7 @@ func (cfg *Config) SanitizeCodexKeys() { out := make([]CodexKey, 0, len(cfg.CodexKey)) for i := range cfg.CodexKey { e := cfg.CodexKey[i] + e.Prefix = normalizeModelPrefix(e.Prefix) e.BaseURL = strings.TrimSpace(e.BaseURL) e.Headers = NormalizeHeaders(e.Headers) e.ExcludedModels = NormalizeExcludedModels(e.ExcludedModels) @@ -460,6 +474,7 @@ func (cfg *Config) SanitizeClaudeKeys() { } for i := range cfg.ClaudeKey { entry := &cfg.ClaudeKey[i] + entry.Prefix = normalizeModelPrefix(entry.Prefix) entry.Headers = NormalizeHeaders(entry.Headers) entry.ExcludedModels = NormalizeExcludedModels(entry.ExcludedModels) } @@ -479,6 +494,7 @@ func (cfg *Config) SanitizeGeminiKeys() { if entry.APIKey == "" { continue } + entry.Prefix = normalizeModelPrefix(entry.Prefix) entry.BaseURL = strings.TrimSpace(entry.BaseURL) entry.ProxyURL = strings.TrimSpace(entry.ProxyURL) entry.Headers = NormalizeHeaders(entry.Headers) @@ -492,6 +508,18 @@ func (cfg *Config) SanitizeGeminiKeys() { cfg.GeminiKey = out } +func normalizeModelPrefix(prefix string) string { + trimmed := strings.TrimSpace(prefix) + trimmed = strings.Trim(trimmed, "/") + if trimmed == "" { + return "" + } + if strings.Contains(trimmed, "/") { + return "" + } + return trimmed +} + func syncInlineAccessProvider(cfg *Config) { if cfg == nil { return diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go index 1257dd62..a14f75bc 100644 --- a/internal/config/vertex_compat.go +++ b/internal/config/vertex_compat.go @@ -13,6 +13,9 @@ type VertexCompatKey struct { // Maps to the x-goog-api-key header. APIKey string `yaml:"api-key" json:"api-key"` + // Prefix optionally namespaces model aliases for this credential (e.g., "teamA/vertex-pro"). + Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` + // BaseURL is the base URL for the Vertex-compatible API endpoint. // The executor will append "/v1/publishers/google/models/{model}:action" to this. // Example: "https://zenmux.ai/api" becomes "https://zenmux.ai/api/v1/publishers/google/models/..." @@ -53,6 +56,7 @@ func (cfg *Config) SanitizeVertexCompatKeys() { if entry.APIKey == "" { continue } + entry.Prefix = normalizeModelPrefix(entry.Prefix) entry.BaseURL = strings.TrimSpace(entry.BaseURL) if entry.BaseURL == "" { // BaseURL is required for Vertex API key entries diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 43a3a3dc..68ff5394 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -183,7 +183,7 @@ func (w *Watcher) Start(ctx context.Context) error { go w.processEvents(ctx) // Perform an initial full reload based on current config and auth dir - w.reloadClients(true, nil) + w.reloadClients(true, nil, false) return nil } @@ -276,7 +276,7 @@ func (w *Watcher) DispatchRuntimeAuthUpdate(update AuthUpdate) bool { return true } -func (w *Watcher) refreshAuthState() { +func (w *Watcher) refreshAuthState(force bool) { auths := w.SnapshotCoreAuths() w.clientsMutex.Lock() if len(w.runtimeAuths) > 0 { @@ -286,12 +286,12 @@ func (w *Watcher) refreshAuthState() { } } } - updates := w.prepareAuthUpdatesLocked(auths) + updates := w.prepareAuthUpdatesLocked(auths, force) w.clientsMutex.Unlock() w.dispatchAuthUpdates(updates) } -func (w *Watcher) prepareAuthUpdatesLocked(auths []*coreauth.Auth) []AuthUpdate { +func (w *Watcher) prepareAuthUpdatesLocked(auths []*coreauth.Auth, force bool) []AuthUpdate { newState := make(map[string]*coreauth.Auth, len(auths)) for _, auth := range auths { if auth == nil || auth.ID == "" { @@ -318,7 +318,7 @@ func (w *Watcher) prepareAuthUpdatesLocked(auths []*coreauth.Auth) []AuthUpdate for id, auth := range newState { if existing, ok := w.currentAuths[id]; !ok { updates = append(updates, AuthUpdate{Action: AuthUpdateActionAdd, ID: id, Auth: auth.Clone()}) - } else if !authEqual(existing, auth) { + } else if force || !authEqual(existing, auth) { updates = append(updates, AuthUpdate{Action: AuthUpdateActionModify, ID: id, Auth: auth.Clone()}) } } @@ -949,15 +949,16 @@ func (w *Watcher) reloadConfig() bool { } authDirChanged := oldConfig == nil || oldConfig.AuthDir != newConfig.AuthDir + forceAuthRefresh := oldConfig != nil && oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix log.Infof("config successfully reloaded, triggering client reload") // Reload clients with new config - w.reloadClients(authDirChanged, affectedOAuthProviders) + w.reloadClients(authDirChanged, affectedOAuthProviders, forceAuthRefresh) return true } // reloadClients performs a full scan and reload of all clients. -func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string) { +func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string, forceAuthRefresh bool) { log.Debugf("starting full client load process") w.clientsMutex.RLock() @@ -1048,7 +1049,7 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string w.reloadCallback(cfg) } - w.refreshAuthState() + w.refreshAuthState(forceAuthRefresh) log.Infof("full client load complete - %d clients (%d auth files + %d Gemini API keys + %d Vertex API keys + %d Claude API keys + %d Codex keys + %d OpenAI-compat)", totalNewClients, @@ -1099,7 +1100,7 @@ func (w *Watcher) addOrUpdateClient(path string) { w.clientsMutex.Unlock() // Unlock before the callback - w.refreshAuthState() + w.refreshAuthState(false) if w.reloadCallback != nil { log.Debugf("triggering server update callback after add/update") @@ -1118,7 +1119,7 @@ func (w *Watcher) removeClient(path string) { w.clientsMutex.Unlock() // Release the lock before the callback - w.refreshAuthState() + w.refreshAuthState(false) if w.reloadCallback != nil { log.Debugf("triggering server update callback after removal") @@ -1147,6 +1148,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { if key == "" { continue } + prefix := strings.TrimSpace(entry.Prefix) base := strings.TrimSpace(entry.BaseURL) proxyURL := strings.TrimSpace(entry.ProxyURL) id, token := idGen.next("gemini:apikey", key, base) @@ -1162,6 +1164,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { ID: id, Provider: "gemini", Label: "gemini-apikey", + Prefix: prefix, Status: coreauth.StatusActive, ProxyURL: proxyURL, Attributes: attrs, @@ -1179,6 +1182,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { if key == "" { continue } + prefix := strings.TrimSpace(ck.Prefix) base := strings.TrimSpace(ck.BaseURL) id, token := idGen.next("claude:apikey", key, base) attrs := map[string]string{ @@ -1197,6 +1201,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { ID: id, Provider: "claude", Label: "claude-apikey", + Prefix: prefix, Status: coreauth.StatusActive, ProxyURL: proxyURL, Attributes: attrs, @@ -1213,6 +1218,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { if key == "" { continue } + prefix := strings.TrimSpace(ck.Prefix) id, token := idGen.next("codex:apikey", key, ck.BaseURL) attrs := map[string]string{ "source": fmt.Sprintf("config:codex[%s]", token), @@ -1227,6 +1233,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { ID: id, Provider: "codex", Label: "codex-apikey", + Prefix: prefix, Status: coreauth.StatusActive, ProxyURL: proxyURL, Attributes: attrs, @@ -1238,6 +1245,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { } for i := range cfg.OpenAICompatibility { compat := &cfg.OpenAICompatibility[i] + prefix := strings.TrimSpace(compat.Prefix) providerName := strings.ToLower(strings.TrimSpace(compat.Name)) if providerName == "" { providerName = "openai-compatibility" @@ -1269,6 +1277,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { ID: id, Provider: providerName, Label: compat.Name, + Prefix: prefix, Status: coreauth.StatusActive, ProxyURL: proxyURL, Attributes: attrs, @@ -1295,6 +1304,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { ID: id, Provider: providerName, Label: compat.Name, + Prefix: prefix, Status: coreauth.StatusActive, Attributes: attrs, CreatedAt: now, @@ -1312,6 +1322,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { base := strings.TrimSpace(compat.BaseURL) key := strings.TrimSpace(compat.APIKey) + prefix := strings.TrimSpace(compat.Prefix) proxyURL := strings.TrimSpace(compat.ProxyURL) idKind := fmt.Sprintf("vertex:apikey:%s", base) id, token := idGen.next(idKind, key, base, proxyURL) @@ -1331,6 +1342,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { ID: id, Provider: providerName, Label: "vertex-apikey", + Prefix: prefix, Status: coreauth.StatusActive, ProxyURL: proxyURL, Attributes: attrs, @@ -1383,10 +1395,20 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { proxyURL = p } + prefix := "" + if rawPrefix, ok := metadata["prefix"].(string); ok { + trimmed := strings.TrimSpace(rawPrefix) + trimmed = strings.Trim(trimmed, "/") + if trimmed != "" && !strings.Contains(trimmed, "/") { + prefix = trimmed + } + } + a := &coreauth.Auth{ ID: id, Provider: provider, Label: label, + Prefix: prefix, Status: coreauth.StatusActive, Attributes: map[string]string{ "source": full, @@ -1473,6 +1495,7 @@ func synthesizeGeminiVirtualAuths(primary *coreauth.Auth, metadata map[string]an Attributes: attrs, Metadata: metadataCopy, ProxyURL: primary.ProxyURL, + Prefix: primary.Prefix, CreatedAt: now, UpdatedAt: now, Runtime: geminicli.NewVirtualCredential(projectID, shared), @@ -1742,6 +1765,9 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if oldCfg.WebsocketAuth != newCfg.WebsocketAuth { changes = append(changes, fmt.Sprintf("ws-auth: %t -> %t", oldCfg.WebsocketAuth, newCfg.WebsocketAuth)) } + if oldCfg.ForceModelPrefix != newCfg.ForceModelPrefix { + changes = append(changes, fmt.Sprintf("force-model-prefix: %t -> %t", oldCfg.ForceModelPrefix, newCfg.ForceModelPrefix)) + } // Quota-exceeded behavior if oldCfg.QuotaExceeded.SwitchProject != newCfg.QuotaExceeded.SwitchProject { diff --git a/sdk/api/handlers/handlers.go b/sdk/api/handlers/handlers.go index a17e54aa..e5b4fc93 100644 --- a/sdk/api/handlers/handlers.go +++ b/sdk/api/handlers/handlers.go @@ -49,9 +49,6 @@ type BaseAPIHandler struct { // Cfg holds the current application configuration. Cfg *config.SDKConfig - - // OpenAICompatProviders is a list of provider names for OpenAI compatibility. - OpenAICompatProviders []string } // NewBaseAPIHandlers creates a new API handlers instance. @@ -63,11 +60,10 @@ type BaseAPIHandler struct { // // Returns: // - *BaseAPIHandler: A new API handlers instance -func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager, openAICompatProviders []string) *BaseAPIHandler { +func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager) *BaseAPIHandler { return &BaseAPIHandler{ - Cfg: cfg, - AuthManager: authManager, - OpenAICompatProviders: openAICompatProviders, + Cfg: cfg, + AuthManager: authManager, } } @@ -342,30 +338,19 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string // Resolve "auto" model to an actual available model first resolvedModelName := util.ResolveAutoModel(modelName) - providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName) - - targetModelName := resolvedModelName - if isDynamic { - targetModelName = extractedModelName - } - // Normalize the model name to handle dynamic thinking suffixes before determining the provider. - normalizedModel, metadata = normalizeModelMetadata(targetModelName) + normalizedModel, metadata = normalizeModelMetadata(resolvedModelName) - if isDynamic { - providers = []string{providerName} - } else { - // For non-dynamic models, use the normalizedModel to get the provider name. - providers = util.GetProviderName(normalizedModel) - if len(providers) == 0 && metadata != nil { - if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok { - if originalModel, okStr := originalRaw.(string); okStr { - originalModel = strings.TrimSpace(originalModel) - if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) { - if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 { - providers = altProviders - normalizedModel = originalModel - } + // Use the normalizedModel to get the provider name. + providers = util.GetProviderName(normalizedModel) + if len(providers) == 0 && metadata != nil { + if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok { + if originalModel, okStr := originalRaw.(string); okStr { + originalModel = strings.TrimSpace(originalModel) + if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) { + if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 { + providers = altProviders + normalizedModel = originalModel } } } @@ -383,30 +368,6 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string return providers, normalizedModel, metadata, nil } -func (h *BaseAPIHandler) parseDynamicModel(modelName string) (providerName, model string, isDynamic bool) { - var providerPart, modelPart string - for _, sep := range []string{"://"} { - if parts := strings.SplitN(modelName, sep, 2); len(parts) == 2 { - providerPart = parts[0] - modelPart = parts[1] - break - } - } - - if providerPart == "" { - return "", modelName, false - } - - // Check if the provider is a configured openai-compatibility provider - for _, pName := range h.OpenAICompatProviders { - if pName == providerPart { - return providerPart, modelPart, true - } - } - - return "", modelName, false -} - func cloneBytes(src []byte) []byte { if len(src) == 0 { return nil diff --git a/sdk/cliproxy/auth/manager.go b/sdk/cliproxy/auth/manager.go index 9f247bb9..c345cd15 100644 --- a/sdk/cliproxy/auth/manager.go +++ b/sdk/cliproxy/auth/manager.go @@ -363,10 +363,11 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req if provider == "" { return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "provider identifier is empty"} } + routeModel := req.Model tried := make(map[string]struct{}) var lastErr error for { - auth, executor, errPick := m.pickNext(ctx, provider, req.Model, opts, tried) + auth, executor, errPick := m.pickNext(ctx, provider, routeModel, opts, tried) if errPick != nil { if lastErr != nil { return cliproxyexecutor.Response{}, lastErr @@ -396,8 +397,10 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt) execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) } - resp, errExec := executor.Execute(execCtx, auth, req, opts) - result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: errExec == nil} + execReq := req + execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) + resp, errExec := executor.Execute(execCtx, auth, execReq, opts) + result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { result.Error = &Error{Message: errExec.Error()} var se cliproxyexecutor.StatusError @@ -420,10 +423,11 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string, if provider == "" { return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "provider identifier is empty"} } + routeModel := req.Model tried := make(map[string]struct{}) var lastErr error for { - auth, executor, errPick := m.pickNext(ctx, provider, req.Model, opts, tried) + auth, executor, errPick := m.pickNext(ctx, provider, routeModel, opts, tried) if errPick != nil { if lastErr != nil { return cliproxyexecutor.Response{}, lastErr @@ -453,8 +457,10 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string, execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt) execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) } - resp, errExec := executor.CountTokens(execCtx, auth, req, opts) - result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: errExec == nil} + execReq := req + execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) + resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts) + result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { result.Error = &Error{Message: errExec.Error()} var se cliproxyexecutor.StatusError @@ -477,10 +483,11 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string if provider == "" { return nil, &Error{Code: "provider_not_found", Message: "provider identifier is empty"} } + routeModel := req.Model tried := make(map[string]struct{}) var lastErr error for { - auth, executor, errPick := m.pickNext(ctx, provider, req.Model, opts, tried) + auth, executor, errPick := m.pickNext(ctx, provider, routeModel, opts, tried) if errPick != nil { if lastErr != nil { return nil, lastErr @@ -510,14 +517,16 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt) execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt) } - chunks, errStream := executor.ExecuteStream(execCtx, auth, req, opts) + execReq := req + execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) + chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts) if errStream != nil { rerr := &Error{Message: errStream.Error()} var se cliproxyexecutor.StatusError if errors.As(errStream, &se) && se != nil { rerr.HTTPStatus = se.StatusCode() } - result := Result{AuthID: auth.ID, Provider: provider, Model: req.Model, Success: false, Error: rerr} + result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: false, Error: rerr} result.RetryAfter = retryAfterFromError(errStream) m.MarkResult(execCtx, result) lastErr = errStream @@ -535,18 +544,66 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string if errors.As(chunk.Err, &se) && se != nil { rerr.HTTPStatus = se.StatusCode() } - m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: req.Model, Success: false, Error: rerr}) + m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr}) } out <- chunk } if !failed { - m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: req.Model, Success: true}) + m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true}) } }(execCtx, auth.Clone(), provider, chunks) return out, nil } } +func rewriteModelForAuth(model string, metadata map[string]any, auth *Auth) (string, map[string]any) { + if auth == nil || model == "" { + return model, metadata + } + prefix := strings.TrimSpace(auth.Prefix) + if prefix == "" { + return model, metadata + } + needle := prefix + "/" + if !strings.HasPrefix(model, needle) { + return model, metadata + } + rewritten := strings.TrimPrefix(model, needle) + return rewritten, stripPrefixFromMetadata(metadata, needle) +} + +func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string]any { + if len(metadata) == 0 || needle == "" { + return metadata + } + keys := []string{ + util.ThinkingOriginalModelMetadataKey, + util.GeminiOriginalModelMetadataKey, + } + var out map[string]any + for _, key := range keys { + raw, ok := metadata[key] + if !ok { + continue + } + value, okStr := raw.(string) + if !okStr || !strings.HasPrefix(value, needle) { + continue + } + if out == nil { + out = make(map[string]any, len(metadata)) + for k, v := range metadata { + out[k] = v + } + } + out[key] = strings.TrimPrefix(value, needle) + } + if out == nil { + return metadata + } + return out +} + func (m *Manager) normalizeProviders(providers []string) []string { if len(providers) == 0 { return nil diff --git a/sdk/cliproxy/auth/types.go b/sdk/cliproxy/auth/types.go index efba6981..5a2d216d 100644 --- a/sdk/cliproxy/auth/types.go +++ b/sdk/cliproxy/auth/types.go @@ -19,6 +19,8 @@ type Auth struct { Index uint64 `json:"-"` // Provider is the upstream provider key (e.g. "gemini", "claude"). Provider string `json:"provider"` + // Prefix optionally namespaces models for routing (e.g., "teamA/gemini-3-pro-preview"). + Prefix string `json:"prefix,omitempty"` // FileName stores the relative or absolute path of the backing auth file. FileName string `json:"-"` // Storage holds the token persistence implementation used during login flows. diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 1ef829d1..f3cbf484 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -787,7 +787,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { if providerKey == "" { providerKey = "openai-compatibility" } - GlobalModelRegistry().RegisterClient(a.ID, providerKey, ms) + GlobalModelRegistry().RegisterClient(a.ID, providerKey, applyModelPrefixes(ms, a.Prefix, s.cfg.ForceModelPrefix)) } else { // Ensure stale registrations are cleared when model list becomes empty. GlobalModelRegistry().UnregisterClient(a.ID) @@ -807,7 +807,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { if key == "" { key = strings.ToLower(strings.TrimSpace(a.Provider)) } - GlobalModelRegistry().RegisterClient(a.ID, key, models) + GlobalModelRegistry().RegisterClient(a.ID, key, applyModelPrefixes(models, a.Prefix, s.cfg != nil && s.cfg.ForceModelPrefix)) return } @@ -987,6 +987,48 @@ func applyExcludedModels(models []*ModelInfo, excluded []string) []*ModelInfo { return filtered } +func applyModelPrefixes(models []*ModelInfo, prefix string, forceModelPrefix bool) []*ModelInfo { + trimmedPrefix := strings.TrimSpace(prefix) + if trimmedPrefix == "" || len(models) == 0 { + return models + } + + out := make([]*ModelInfo, 0, len(models)*2) + seen := make(map[string]struct{}, len(models)*2) + + addModel := func(model *ModelInfo) { + if model == nil { + return + } + id := strings.TrimSpace(model.ID) + if id == "" { + return + } + if _, exists := seen[id]; exists { + return + } + seen[id] = struct{}{} + out = append(out, model) + } + + for _, model := range models { + if model == nil { + continue + } + baseID := strings.TrimSpace(model.ID) + if baseID == "" { + continue + } + if !forceModelPrefix || trimmedPrefix == baseID { + addModel(model) + } + clone := *model + clone.ID = trimmedPrefix + "/" + baseID + addModel(&clone) + } + return out +} + // matchWildcard performs case-insensitive wildcard matching where '*' matches any substring. func matchWildcard(pattern, value string) bool { if pattern == "" { diff --git a/sdk/config/config.go b/sdk/config/config.go index acb340ef..f6f20d5c 100644 --- a/sdk/config/config.go +++ b/sdk/config/config.go @@ -9,6 +9,11 @@ type SDKConfig struct { // ProxyURL is the URL of an optional proxy server to use for outbound requests. ProxyURL string `yaml:"proxy-url" json:"proxy-url"` + // ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview") + // to target prefixed credentials. When false, unprefixed model requests may use prefixed + // credentials as well. + ForceModelPrefix bool `yaml:"force-model-prefix" json:"force-model-prefix"` + // RequestLog enables or disables detailed request logging functionality. RequestLog bool `yaml:"request-log" json:"request-log"` From 670685139af3098a2dbad1c88a59f706ee665c0b Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 17 Dec 2025 01:17:02 +0800 Subject: [PATCH 5/5] fix(api): update route patterns to support wildcards for Gemini actions Normalize action handling by accommodating wildcard patterns in route definitions for Gemini endpoints. Adjust `request.Action` parsing logic to correctly process routes with prefixed actions. --- internal/api/modules/amp/routes.go | 4 ++-- internal/api/server.go | 4 ++-- sdk/api/handlers/gemini/gemini_handlers.go | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/internal/api/modules/amp/routes.go b/internal/api/modules/amp/routes.go index 8d9ec8ae..0abd943a 100644 --- a/internal/api/modules/amp/routes.go +++ b/internal/api/modules/amp/routes.go @@ -267,7 +267,7 @@ func (m *AmpModule) registerProviderAliases(engine *gin.Engine, baseHandler *han v1betaAmp := provider.Group("/v1beta") { v1betaAmp.GET("/models", geminiHandlers.GeminiModels) - v1betaAmp.POST("/models/:action", fallbackHandler.WrapHandler(geminiHandlers.GeminiHandler)) - v1betaAmp.GET("/models/:action", geminiHandlers.GeminiGetHandler) + v1betaAmp.POST("/models/*action", fallbackHandler.WrapHandler(geminiHandlers.GeminiHandler)) + v1betaAmp.GET("/models/*action", geminiHandlers.GeminiGetHandler) } } diff --git a/internal/api/server.go b/internal/api/server.go index 5ffffa1d..e6d03bc3 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -330,8 +330,8 @@ func (s *Server) setupRoutes() { v1beta.Use(AuthMiddleware(s.accessManager)) { v1beta.GET("/models", geminiHandlers.GeminiModels) - v1beta.POST("/models/:action", geminiHandlers.GeminiHandler) - v1beta.GET("/models/:action", geminiHandlers.GeminiGetHandler) + v1beta.POST("/models/*action", geminiHandlers.GeminiHandler) + v1beta.GET("/models/*action", geminiHandlers.GeminiGetHandler) } // Root endpoint diff --git a/sdk/api/handlers/gemini/gemini_handlers.go b/sdk/api/handlers/gemini/gemini_handlers.go index 6cd9ee62..901421b5 100644 --- a/sdk/api/handlers/gemini/gemini_handlers.go +++ b/sdk/api/handlers/gemini/gemini_handlers.go @@ -84,7 +84,8 @@ func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) { }) return } - switch request.Action { + action := strings.TrimPrefix(request.Action, "/") + switch action { case "gemini-3-pro-preview": c.JSON(http.StatusOK, gin.H{ "name": "models/gemini-3-pro-preview", @@ -189,7 +190,7 @@ func (h *GeminiAPIHandler) GeminiHandler(c *gin.Context) { }) return } - action := strings.Split(request.Action, ":") + action := strings.Split(strings.TrimPrefix(request.Action, "/"), ":") if len(action) != 2 { c.JSON(http.StatusNotFound, handlers.ErrorResponse{ Error: handlers.ErrorDetail{