From b055e00c1a5042aacf96f0266553e51fd212b288 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 17:49:44 +0800 Subject: [PATCH] fix(executor): use upstream model for thinking config and payload translation --- .../runtime/executor/aistudio_executor.go | 54 ++++++--- .../runtime/executor/antigravity_executor.go | 56 +++++----- internal/runtime/executor/claude_executor.go | 26 ++--- internal/runtime/executor/codex_executor.go | 16 +-- .../runtime/executor/gemini_cli_executor.go | 95 ++++++++++------ .../executor/gemini_vertex_executor.go | 104 ++++++++++-------- internal/runtime/executor/iflow_executor.go | 33 ++++-- internal/runtime/executor/qwen_executor.go | 33 ++++-- 8 files changed, 255 insertions(+), 162 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 17c8170f..394a295e 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -55,11 +55,17 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - translatedReq, body, err := e.translateRequest(req, opts, false) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + translatedReq, body, err := e.translateRequest(req, opts, false, upstreamModel) if err != nil { return resp, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + + endpoint := e.buildEndpoint(upstreamModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -109,11 +115,17 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - translatedReq, body, err := e.translateRequest(req, opts, true) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + translatedReq, body, err := e.translateRequest(req, opts, true, upstreamModel) if err != nil { return nil, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + + endpoint := e.buildEndpoint(upstreamModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -254,7 +266,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth // CountTokens counts tokens for the given request using the AI Studio API. func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - _, body, err := e.translateRequest(req, opts, false) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + _, body, err := e.translateRequest(req, opts, false, upstreamModel) if err != nil { return cliproxyexecutor.Response{}, err } @@ -263,7 +280,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A body.payload, _ = sjson.DeleteBytes(body.payload, "tools") body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings") - endpoint := e.buildEndpoint(req.Model, "countTokens", "") + endpoint := e.buildEndpoint(upstreamModel, "countTokens", "") wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -318,18 +335,23 @@ type translatedPayload struct { toFormat sdktranslator.Format } -func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) { +func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool, upstreamModel string) ([]byte, translatedPayload, error) { + model := strings.TrimSpace(upstreamModel) + if model == "" { + model = strings.TrimSpace(req.Model) + } + from := opts.SourceFormat to := sdktranslator.FromString("gemini") - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) - payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload) - payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) - payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true) - payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true) - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiImageAspectRatio(req.Model, payload) - payload = applyPayloadConfig(e.cfg, req.Model, payload) + payload := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) + payload = ApplyThinkingMetadata(payload, req.Metadata, model) + payload = util.ApplyGemini3ThinkingLevelFromMetadata(model, req.Metadata, payload) + payload = util.ApplyDefaultThinkingIfNeeded(model, payload) + payload = util.ConvertThinkingLevelToBudget(payload, model, true) + payload = util.NormalizeGeminiThinkingBudget(model, payload, true) + payload = util.StripThinkingConfigIfUnsupported(model, payload) + payload = fixGeminiImageAspectRatio(model, payload) + payload = applyPayloadConfig(e.cfg, model, payload) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema") diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 9ade4fbb..c2aa4706 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -98,13 +98,13 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) + translated = normalizeAntigravityThinking(upstreamModel, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -191,20 +191,20 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - from := opts.SourceFormat - to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model } - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) - translated = normalizeAntigravityThinking(req.Model, translated, true) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + from := opts.SourceFormat + to := sdktranslator.FromString("antigravity") + translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + + translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) + translated = normalizeAntigravityThinking(upstreamModel, translated, true) + translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -530,21 +530,21 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - from := opts.SourceFormat - to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model } isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude") - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + from := opts.SourceFormat + to := sdktranslator.FromString("antigravity") + translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + + translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) + translated = normalizeAntigravityThinking(upstreamModel, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -713,10 +713,10 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut var lastErr error for idx, baseURL := range baseURLs { - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload) - payload = normalizeAntigravityThinking(req.Model, payload, isClaude) + payload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + payload = applyThinkingMetadataCLI(payload, req.Metadata, upstreamModel) + payload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, payload) + payload = normalizeAntigravityThinking(upstreamModel, payload, isClaude) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 2fbb235b..52c60163 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -49,11 +49,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - from := opts.SourceFormat - to := sdktranslator.FromString("claude") - // Use streaming translation to preserve function calling, except for claude. - stream := from != to - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model @@ -65,20 +60,25 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r upstreamModel = modelOverride } } + from := opts.SourceFormat + to := sdktranslator.FromString("claude") + // Use streaming translation to preserve function calling, except for claude. + stream := from != to + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), stream) body, _ = sjson.SetBytes(body, "model", upstreamModel) // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(req.Model, req.Metadata, body) + body = e.injectThinkingConfig(upstreamModel, req.Metadata, body) if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(req.Model, body) + body = ensureMaxTokensForThinking(upstreamModel, body) // Extract betas from body and convert to header var extraBetas []string @@ -170,7 +170,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model @@ -182,17 +181,18 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A upstreamModel = modelOverride } } + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) body, _ = sjson.SetBytes(body, "model", upstreamModel) // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(req.Model, req.Metadata, body) + body = e.injectThinkingConfig(upstreamModel, req.Metadata, body) body = checkSystemInstructions(body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(req.Model, body) + body = ensureMaxTokensForThinking(upstreamModel, body) // Extract betas from body and convert to header var extraBetas []string @@ -316,7 +316,6 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model @@ -328,6 +327,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut upstreamModel = modelOverride } } + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), stream) body, _ = sjson.SetBytes(body, "model", upstreamModel) if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") { diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 310988c1..71e36435 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -63,13 +63,13 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) body = NormalizeThinkingConfig(body, upstreamModel, false) if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") @@ -170,14 +170,14 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) body = NormalizeThinkingConfig(body, upstreamModel, false) if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "model", upstreamModel) @@ -280,11 +280,11 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) modelForCounting := upstreamModel - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index b171041a..0be3bc76 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -75,16 +75,21 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) + basePayload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, upstreamModel) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, basePayload) + basePayload = util.NormalizeGeminiCLIThinkingBudget(upstreamModel, basePayload) + basePayload = util.StripThinkingConfigIfUnsupported(upstreamModel, basePayload) + basePayload = fixGeminiCLIImageAspectRatio(upstreamModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "gemini", "request", basePayload) action := "generateContent" if req.Metadata != nil { @@ -94,9 +99,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(upstreamModel) + if len(models) == 0 || models[0] != upstreamModel { + models = append([]string{upstreamModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -110,6 +115,10 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth var lastStatus int var lastBody []byte + // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be + // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel + // is only used as the concrete model id sent to the upstream Gemini CLI endpoint (and the + // model label passed into response translation) when iterating fallback variants. for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) if action == "countTokens" { @@ -214,22 +223,27 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) + basePayload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, upstreamModel) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, basePayload) + basePayload = util.NormalizeGeminiCLIThinkingBudget(upstreamModel, basePayload) + basePayload = util.StripThinkingConfigIfUnsupported(upstreamModel, basePayload) + basePayload = fixGeminiCLIImageAspectRatio(upstreamModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "gemini", "request", basePayload) projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(upstreamModel) + if len(models) == 0 || models[0] != upstreamModel { + models = append([]string{upstreamModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -243,6 +257,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var lastStatus int var lastBody []byte + // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be + // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel + // is only used as the concrete model id sent to the upstream Gemini CLI endpoint (and the + // model label passed into response translation) when iterating fallback variants. for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) @@ -318,7 +336,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut out := make(chan cliproxyexecutor.StreamChunk) stream = out - go func(resp *http.Response, reqBody []byte, attempt string) { + go func(resp *http.Response, reqBody []byte, attemptModel string) { defer close(out) defer func() { if errClose := resp.Body.Close(); errClose != nil { @@ -336,14 +354,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut reporter.publish(ctx, detail) } if bytes.HasPrefix(line, dataTag) { - segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), ¶m) + segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } } } - segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) + segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } @@ -365,12 +383,12 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut appendAPIResponseChunk(ctx, e.cfg, data) reporter.publish(ctx, parseGeminiCLIUsage(data)) var param any - segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, data, ¶m) + segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, data, ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } - segments = sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) + segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } @@ -399,9 +417,14 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + models := cliPreviewFallbackOrder(upstreamModel) + if len(models) == 0 || models[0] != upstreamModel { + models = append([]string{upstreamModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -417,15 +440,19 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. var lastStatus int var lastBody []byte + // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be + // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel + // is only used as the concrete model id sent to the upstream Gemini CLI endpoint when iterating + // fallback variants. for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload) + payload = applyThinkingMetadataCLI(payload, req.Metadata, upstreamModel) + payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, payload) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) + payload = util.StripThinkingConfigIfUnsupported(upstreamModel, payload) + payload = fixGeminiCLIImageAspectRatio(upstreamModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index df8ee506..03470bec 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -121,22 +121,25 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) action := "generateContent" @@ -221,22 +224,25 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) action := "generateContent" @@ -322,22 +328,25 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) baseURL := vertexBaseURL(location) @@ -439,22 +448,25 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) // For API key auth, use simpler URL format without project/location @@ -553,19 +565,22 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth // countTokensWithServiceAccount counts tokens using service account credentials. func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) { upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + translatedReq := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + translatedReq = util.StripThinkingConfigIfUnsupported(upstreamModel, translatedReq) + translatedReq = fixGeminiImageAspectRatio(upstreamModel, translatedReq) translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") @@ -642,19 +657,22 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context // countTokensWithAPIKey handles token counting using API key credentials. func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) { upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + translatedReq := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + translatedReq = util.StripThinkingConfigIfUnsupported(upstreamModel, translatedReq) + translatedReq = fixGeminiImageAspectRatio(upstreamModel, translatedReq) translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") @@ -665,7 +683,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens") + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 124a984e..9ac1c9f3 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -54,11 +54,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -68,7 +72,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re } body = applyIFlowThinkingConfig(body) body = preserveReasoningContentInMessages(body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -146,12 +150,16 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -166,7 +174,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { body = ensureToolsArray(body) } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -249,11 +257,16 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - enc, err := tokenizerForModel(req.Model) + enc, err := tokenizerForModel(upstreamModel) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err) } diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index 1d4ef52d..cf4aa6e3 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -48,11 +48,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -60,7 +64,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -127,12 +131,16 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -147,7 +155,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -227,13 +235,18 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut } func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) modelName := gjson.GetBytes(body, "model").String() if strings.TrimSpace(modelName) == "" { - modelName = req.Model + modelName = upstreamModel } enc, err := tokenizerForModel(modelName)