From 7dd93a4a256224454f756246759a6711a3d9b1bd Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:50:10 +0800 Subject: [PATCH] fix(executor): only apply thinking config to supported models --- .../runtime/executor/aistudio_executor.go | 8 +++- .../runtime/executor/gemini_cli_executor.go | 47 +++++++------------ internal/runtime/executor/gemini_executor.go | 24 +++++++--- internal/util/gemini_thinking.go | 16 +++++++ 4 files changed, 58 insertions(+), 37 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index cfc86d0e..396f8eaa 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -256,10 +256,14 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c from := opts.SourceFormat to := sdktranslator.FromString("gemini") payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) } - payload = disableGeminiThinkingConfig(payload, req.Model) + payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload) metadataAction := "generateContent" if req.Metadata != nil { diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 3d7a539d..e00fd485 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -63,7 +63,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -101,7 +105,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) } - payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { @@ -196,7 +200,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -223,7 +231,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) - payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { @@ -393,12 +401,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(attemptModel) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(attemptModel, *budgetOverride) + budgetOverride = &norm + } payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) } payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") - payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) tok, errTok := tokenSource.Token() @@ -623,29 +635,6 @@ func cliPreviewFallbackOrder(model string) []string { } } -func disableGeminiThinkingConfig(body []byte, model string) []byte { - if !geminiModelDisallowsThinking(model) { - return body - } - - updated := deleteJSONField(body, "request.generationConfig.thinkingConfig") - updated = deleteJSONField(updated, "generationConfig.thinkingConfig") - return updated -} - -func geminiModelDisallowsThinking(model string) bool { - if model == "" { - return false - } - lower := strings.ToLower(model) - for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} { - if strings.Contains(lower, marker) { - return true - } - } - return false -} - // setJSONField sets a top-level JSON field on a byte slice payload via sjson. func setJSONField(body []byte, key, value string) []byte { if key == "" { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index e3008cef..deba86fb 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = disableGeminiThinkingConfig(body, req.Model) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) action := "generateContent" @@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = disableGeminiThinkingConfig(body, req.Model) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") @@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut from := opts.SourceFormat to := sdktranslator.FromString("gemini") translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) + translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 9403a8e4..33c9edcf 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) { } return budgetPtr, includePtr, matched } + +// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body +// when the target model does not advertise Thinking capability. It cleans both +// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net +// in case upstream injected thinking for an unsupported model. +func StripThinkingConfigIfUnsupported(model string, body []byte) []byte { + if ModelSupportsThinking(model) || len(body) == 0 { + return body + } + updated := body + // Gemini CLI path + updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig") + // Standard Gemini path + updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig") + return updated +}