From 3a81ab22fdb6c9b993fac1deef94785f8a8f5dbf Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 14:35:05 +0800 Subject: [PATCH] fix(runtime): unify reasoning effort metadata overrides --- internal/runtime/executor/codex_executor.go | 6 ++-- internal/runtime/executor/iflow_executor.go | 4 +-- .../executor/openai_compat_executor.go | 4 +-- internal/runtime/executor/payload_helpers.go | 30 +++------------- internal/runtime/executor/qwen_executor.go | 4 +-- internal/util/thinking_suffix.go | 34 ++++++++++++++++--- 6 files changed, 44 insertions(+), 38 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 7003373f..b9470b3c 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -54,7 +54,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") body = normalizeThinkingConfig(body, upstreamModel) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate @@ -152,7 +152,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") body = normalizeThinkingConfig(body, upstreamModel) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate @@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth modelForCounting := req.Model - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index c68a6431..a445e47d 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -57,7 +57,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -143,7 +143,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 507b0fd9..68b2963a 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -58,7 +58,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) @@ -152,7 +152,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 5711bbbd..61486d62 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -45,40 +45,20 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) } -// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata. -// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking. -func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte { +// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. +// Metadata values take precedence over any existing field when the model supports thinking. +func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte { if len(metadata) == 0 { return payload } if !util.ModelSupportsThinking(model) { return payload } - if gjson.GetBytes(payload, "reasoning.effort").Exists() { + if field == "" { return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil { - return updated - } - } - return payload -} - -// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field) -// when present in metadata. It avoids overwriting an existing reasoning_effort field. -func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte { - if len(metadata) == 0 { - return payload - } - if !util.ModelSupportsThinking(model) { - return payload - } - if gjson.GetBytes(payload, "reasoning_effort").Exists() { - return payload - } - if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } } diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index f060cb61..d25ed5da 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -51,7 +51,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -126,7 +126,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index c2d806ad..47ce42f7 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -55,16 +55,42 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { value := modelName[idx+len("-thinking-"):] if value != "" { if parsed, ok := parseIntPrefix(value); ok { - baseModel = modelName[:idx] - budgetOverride = &parsed - matched = true + candidateBase := modelName[:idx] + if ModelUsesThinkingLevels(candidateBase) { + baseModel = candidateBase + // Numeric suffix on level-aware models should still surface as reasoning effort metadata. + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw + } + matched = true + } else { + baseModel = candidateBase + budgetOverride = &parsed + matched = true + } } else { baseModel = modelName[:idx] if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok { reasoningEffort = &normalized matched = true + } else if !ModelUsesThinkingLevels(baseModel) { + // Keep unknown effort tokens so callers can honor user intent even without normalization. + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw + matched = true + } else { + baseModel = modelName + } } else { - baseModel = modelName + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw + matched = true + } else { + baseModel = modelName + } } } }