fix(executor): only apply thinking config to supported models

This commit is contained in:
hkfires
2025-10-29 15:50:10 +08:00
parent 2a3ee8d0e3
commit 7dd93a4a25
4 changed files with 58 additions and 37 deletions

View File

@@ -256,10 +256,14 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
} }
payload = disableGeminiThinkingConfig(payload, req.Model) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiImageAspectRatio(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload)
metadataAction := "generateContent" metadataAction := "generateContent"
if req.Metadata != nil { if req.Metadata != nil {

View File

@@ -63,7 +63,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
to := sdktranslator.FromString("gemini-cli") to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if hasOverride { if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
} }
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
@@ -101,7 +105,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel) payload = setJSONField(payload, "model", attemptModel)
} }
payload = disableGeminiThinkingConfig(payload, attemptModel) payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload)
tok, errTok := tokenSource.Token() tok, errTok := tokenSource.Token()
if errTok != nil { if errTok != nil {
@@ -196,7 +200,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
to := sdktranslator.FromString("gemini-cli") to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if hasOverride { if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
} }
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
@@ -223,7 +231,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
payload := append([]byte(nil), basePayload...) payload := append([]byte(nil), basePayload...)
payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel) payload = setJSONField(payload, "model", attemptModel)
payload = disableGeminiThinkingConfig(payload, attemptModel) payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload)
tok, errTok := tokenSource.Token() tok, errTok := tokenSource.Token()
if errTok != nil { if errTok != nil {
@@ -393,12 +401,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
for _, attemptModel := range models { for _, attemptModel := range models {
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
if hasOverride { if hasOverride && util.ModelSupportsThinking(attemptModel) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(attemptModel, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
} }
payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "model")
payload = disableGeminiThinkingConfig(payload, attemptModel) payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload)
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
tok, errTok := tokenSource.Token() tok, errTok := tokenSource.Token()
@@ -623,29 +635,6 @@ func cliPreviewFallbackOrder(model string) []string {
} }
} }
func disableGeminiThinkingConfig(body []byte, model string) []byte {
if !geminiModelDisallowsThinking(model) {
return body
}
updated := deleteJSONField(body, "request.generationConfig.thinkingConfig")
updated = deleteJSONField(updated, "generationConfig.thinkingConfig")
return updated
}
func geminiModelDisallowsThinking(model string) bool {
if model == "" {
return false
}
lower := strings.ToLower(model)
for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} {
if strings.Contains(lower, marker) {
return true
}
}
return false
}
// setJSONField sets a top-level JSON field on a byte slice payload via sjson. // setJSONField sets a top-level JSON field on a byte slice payload via sjson.
func setJSONField(body []byte, key, value string) []byte { func setJSONField(body []byte, key, value string) []byte {
if key == "" { if key == "" {

View File

@@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
} }
body = disableGeminiThinkingConfig(body, req.Model) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
action := "generateContent" action := "generateContent"
@@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
} }
body = disableGeminiThinkingConfig(body, req.Model) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
@@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
} }
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
respCtx := context.WithValue(ctx, "alt", opts.Alt) respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")

View File

@@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
} }
return budgetPtr, includePtr, matched return budgetPtr, includePtr, matched
} }
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
// when the target model does not advertise Thinking capability. It cleans both
// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
// in case upstream injected thinking for an unsupported model.
func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
if ModelSupportsThinking(model) || len(body) == 0 {
return body
}
updated := body
// Gemini CLI path
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
// Standard Gemini path
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
return updated
}