diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 38c348f2..ba8d8058 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -323,6 +323,11 @@ type translatedPayload struct { func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) { from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream) payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model) payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload) @@ -331,7 +336,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload) - payload = applyPayloadConfig(e.cfg, req.Model, payload) + payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema") diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index b331a9df..f0116faa 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -94,13 +94,18 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -189,13 +194,18 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, true) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -525,13 +535,18 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index f74dc1e0..7be4f41b 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -57,6 +57,11 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) body, _ = sjson.SetBytes(body, "model", model) // Inject thinking config based on model metadata for thinking variants @@ -65,7 +70,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r if !strings.HasPrefix(model, "claude-3-5-haiku") { body = checkSystemInstructions(body) } - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) @@ -167,12 +172,17 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A if override := e.resolveUpstreamModel(req.Model, auth); override != "" { model = override } + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) body, _ = sjson.SetBytes(body, "model", model) // Inject thinking config based on model metadata for thinking variants body = e.injectThinkingConfig(model, req.Metadata, body) body = checkSystemInstructions(body) - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 98678c4d..0788e4f1 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -56,13 +56,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) body = NormalizeThinkingConfig(body, model, false) if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", model) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") @@ -156,6 +161,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("codex") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) @@ -163,7 +173,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { return nil, errValidate } - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "model", model) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 4213ffa0..e4bb7340 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -77,6 +77,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) @@ -84,7 +89,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated) action := "generateContent" if req.Metadata != nil { @@ -216,6 +221,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) @@ -223,7 +233,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated) projectID := resolveGeminiProjectID(auth) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index d69044b8..192f42e2 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -85,13 +85,18 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // Official Gemini API via API key or OAuth bearer from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) body = ApplyThinkingMetadata(body, req.Metadata, model) body = util.ApplyDefaultThinkingIfNeeded(model, body) body = util.NormalizeGeminiThinkingBudget(model, body) body = util.StripThinkingConfigIfUnsupported(model, body) body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", model) action := "generateContent" @@ -183,13 +188,18 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) body = ApplyThinkingMetadata(body, req.Metadata, model) body = util.ApplyDefaultThinkingIfNeeded(model, body) body = util.NormalizeGeminiThinkingBudget(model, body) body = util.StripThinkingConfigIfUnsupported(model, body) body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", model) baseURL := resolveGeminiBaseURL(auth) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index f8f4a63a..bcf4473c 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -122,6 +122,11 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride != nil { @@ -134,7 +139,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", req.Model) action := "generateContent" @@ -225,6 +230,11 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { if budgetOverride != nil { @@ -237,7 +247,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip body = util.NormalizeGeminiThinkingBudget(model, body) body = util.StripThinkingConfigIfUnsupported(model, body) body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", model) action := "generateContent" @@ -324,6 +334,11 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride != nil { @@ -336,7 +351,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", req.Model) baseURL := vertexBaseURL(location) @@ -444,6 +459,11 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { if budgetOverride != nil { @@ -456,7 +476,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth body = util.NormalizeGeminiThinkingBudget(model, body) body = util.StripThinkingConfigIfUnsupported(model, body) body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfig(e.cfg, model, body) + body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", model) // For API key auth, use simpler URL format without project/location diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 8492fb35..e1b0394e 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -56,6 +56,11 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("openai") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) body, _ = sjson.SetBytes(body, "model", req.Model) @@ -65,7 +70,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re } body = applyIFlowThinkingConfig(body) body = preserveReasoningContentInMessages(body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -145,6 +150,11 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("openai") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) @@ -160,7 +170,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { body = ensureToolsArray(body) } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 81fc31a1..60c80f9d 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -53,12 +53,17 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A // Translate inbound request to OpenAI format from := opts.SourceFormat to := sdktranslator.FromString("openai") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream) translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream) modelOverride := e.resolveUpstreamModel(req.Model, auth) if modelOverride != "" { translated = e.overrideModel(translated, modelOverride) } - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated) allowCompat := e.allowCompatReasoningEffort(req.Model, auth) translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) @@ -145,12 +150,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy } from := opts.SourceFormat to := sdktranslator.FromString("openai") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) modelOverride := e.resolveUpstreamModel(req.Model, auth) if modelOverride != "" { translated = e.overrideModel(translated, modelOverride) } - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated) allowCompat := e.allowCompatReasoningEffort(req.Model, auth) translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index d823ef04..e3cfc5d4 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -104,17 +104,11 @@ func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model return payload } -// applyPayloadConfig applies payload default and override rules from configuration -// to the given JSON payload for the specified model. -// Defaults only fill missing fields, while overrides always overwrite existing values. -func applyPayloadConfig(cfg *config.Config, model string, payload []byte) []byte { - return applyPayloadConfigWithRoot(cfg, model, "", "", payload) -} - // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter // paths as relative to the provided root path (for example, "request" for Gemini CLI) -// and restricts matches to the given protocol when supplied. -func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload []byte) []byte { +// and restricts matches to the given protocol when supplied. Defaults are checked +// against the original payload when provided. +func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte) []byte { if cfg == nil || len(payload) == 0 { return payload } @@ -127,6 +121,11 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string return payload } out := payload + source := original + if len(source) == 0 { + source = payload + } + appliedDefaults := make(map[string]struct{}) // Apply default rules: first write wins per field across all matching rules. for i := range rules.Default { rule := &rules.Default[i] @@ -138,7 +137,10 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string if fullPath == "" { continue } - if gjson.GetBytes(out, fullPath).Exists() { + if gjson.GetBytes(source, fullPath).Exists() { + continue + } + if _, ok := appliedDefaults[fullPath]; ok { continue } updated, errSet := sjson.SetBytes(out, fullPath, value) @@ -146,6 +148,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string continue } out = updated + appliedDefaults[fullPath] = struct{}{} } } // Apply override rules: last write wins per field across all matching rules. diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index ff6fa414..be6c1024 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -49,6 +49,11 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req from := opts.SourceFormat to := sdktranslator.FromString("openai") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) body, _ = sjson.SetBytes(body, "model", req.Model) @@ -56,7 +61,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -125,6 +130,11 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut from := opts.SourceFormat to := sdktranslator.FromString("openai") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) @@ -140,7 +150,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))