diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 866f1bdc..2e966237 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -54,8 +54,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") - body = normalizeThinkingConfig(body, upstreamModel) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } @@ -152,8 +152,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") - body = normalizeThinkingConfig(body, upstreamModel) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate } @@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth modelForCounting := req.Model - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 4424ea0f..03df1be0 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -57,12 +57,12 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } @@ -148,12 +148,12 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate } diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index d4b0afcb..1bbd0c8e 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -59,12 +59,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") + allowCompat := e.allowCompatReasoningEffort(req.Model, auth) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" && modelOverride == "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } - translated = normalizeThinkingConfig(translated, upstreamModel) + translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat) if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { return resp, errValidate } @@ -154,12 +155,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") + allowCompat := e.allowCompatReasoningEffort(req.Model, auth) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" && modelOverride == "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } - translated = normalizeThinkingConfig(translated, upstreamModel) + translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat) if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { return nil, errValidate } @@ -325,6 +327,27 @@ func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxy return "" } +func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool { + trimmed := strings.TrimSpace(model) + if trimmed == "" || e == nil || e.cfg == nil { + return false + } + compat := e.resolveCompatConfig(auth) + if compat == nil || len(compat.Models) == 0 { + return false + } + for i := range compat.Models { + entry := compat.Models[i] + if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) { + return true + } + if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) { + return true + } + } + return false +} + func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index b791dac7..6e352c51 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -48,7 +48,7 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. // Metadata values take precedence over any existing field when the model supports thinking, intentionally // overwriting caller-provided values to honor suffix/default metadata priority. -func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte { +func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte { if len(metadata) == 0 { return payload } @@ -59,20 +59,20 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model if baseModel == "" { baseModel = model } - if !util.ModelSupportsThinking(baseModel) && !util.IsOpenAICompatibilityModel(baseModel) { + if !util.ModelSupportsThinking(baseModel) && !allowCompat { return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if util.ModelUsesThinkingLevels(model) { + if util.ModelUsesThinkingLevels(baseModel) || allowCompat { if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } } } // Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models. - if util.ModelUsesThinkingLevels(model) { + if util.ModelUsesThinkingLevels(baseModel) || allowCompat { if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" { + if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" { if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } @@ -237,13 +237,13 @@ func matchModelPattern(pattern, model string) bool { // reasoning fields. For models with level-based thinking, it validates and // normalizes the reasoning effort level. For models with numeric budget thinking, // it strips the effort string fields. -func normalizeThinkingConfig(payload []byte, model string) []byte { +func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte { if len(payload) == 0 || model == "" { return payload } if !util.ModelSupportsThinking(model) { - if util.IsOpenAICompatibilityModel(model) { + if allowCompat { return payload } return stripThinkingFields(payload, false) diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index c8e2974a..3bd61021 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -51,12 +51,12 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } @@ -131,12 +131,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate }