fix(runtime): unify reasoning effort metadata overrides

This commit is contained in:
hkfires
2025-12-11 14:35:05 +08:00
parent 519da2e042
commit 3a81ab22fd
6 changed files with 44 additions and 38 deletions

View File

@@ -54,7 +54,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("codex") to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body = normalizeThinkingConfig(body, upstreamModel) body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return resp, errValidate return resp, errValidate
@@ -152,7 +152,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("codex") to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body = normalizeThinkingConfig(body, upstreamModel) body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return nil, errValidate return nil, errValidate
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
modelForCounting := req.Model modelForCounting := req.Model
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "stream", false) body, _ = sjson.SetBytes(body, "stream", false)

View File

@@ -57,7 +57,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "model", upstreamModel)
} }
@@ -143,7 +143,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "model", upstreamModel)
} }

View File

@@ -58,7 +58,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
translated = e.overrideModel(translated, modelOverride) translated = e.overrideModel(translated, modelOverride)
} }
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" { if upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel) translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
@@ -152,7 +152,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
translated = e.overrideModel(translated, modelOverride) translated = e.overrideModel(translated, modelOverride)
} }
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" { if upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel) translated, _ = sjson.SetBytes(translated, "model", upstreamModel)

View File

@@ -45,40 +45,20 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
} }
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata. // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking. // Metadata values take precedence over any existing field when the model supports thinking.
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte { func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
if len(metadata) == 0 { if len(metadata) == 0 {
return payload return payload
} }
if !util.ModelSupportsThinking(model) { if !util.ModelSupportsThinking(model) {
return payload return payload
} }
if gjson.GetBytes(payload, "reasoning.effort").Exists() { if field == "" {
return payload return payload
} }
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil { if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
return updated
}
}
return payload
}
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
return updated return updated
} }
} }

View File

@@ -51,7 +51,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "model", upstreamModel)
} }
@@ -126,7 +126,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
to := sdktranslator.FromString("openai") to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "model", upstreamModel)
} }

View File

@@ -55,16 +55,42 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
value := modelName[idx+len("-thinking-"):] value := modelName[idx+len("-thinking-"):]
if value != "" { if value != "" {
if parsed, ok := parseIntPrefix(value); ok { if parsed, ok := parseIntPrefix(value); ok {
baseModel = modelName[:idx] candidateBase := modelName[:idx]
budgetOverride = &parsed if ModelUsesThinkingLevels(candidateBase) {
matched = true baseModel = candidateBase
// Numeric suffix on level-aware models should still surface as reasoning effort metadata.
raw := strings.ToLower(strings.TrimSpace(value))
if raw != "" {
reasoningEffort = &raw
}
matched = true
} else {
baseModel = candidateBase
budgetOverride = &parsed
matched = true
}
} else { } else {
baseModel = modelName[:idx] baseModel = modelName[:idx]
if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok { if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok {
reasoningEffort = &normalized reasoningEffort = &normalized
matched = true matched = true
} else if !ModelUsesThinkingLevels(baseModel) {
// Keep unknown effort tokens so callers can honor user intent even without normalization.
raw := strings.ToLower(strings.TrimSpace(value))
if raw != "" {
reasoningEffort = &raw
matched = true
} else {
baseModel = modelName
}
} else { } else {
baseModel = modelName raw := strings.ToLower(strings.TrimSpace(value))
if raw != "" {
reasoningEffort = &raw
matched = true
} else {
baseModel = modelName
}
} }
} }
} }