From c62ecc2442d1d8cda88a79a5d40550ff34c6f942 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:32:03 +0800 Subject: [PATCH 1/2] fix(gemini): Disable thinking config for incompatible models --- .../runtime/executor/gemini_cli_executor.go | 26 +++++++++++++++++++ internal/runtime/executor/gemini_executor.go | 3 +++ 2 files changed, 29 insertions(+) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index a96ca063..281b2970 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -89,6 +89,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) } + payload = disableGeminiThinkingConfig(payload, attemptModel) tok, errTok := tokenSource.Token() if errTok != nil { @@ -165,6 +166,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) + payload = disableGeminiThinkingConfig(payload, attemptModel) tok, errTok := tokenSource.Token() if errTok != nil { @@ -291,6 +293,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") + payload = disableGeminiThinkingConfig(payload, attemptModel) tok, errTok := tokenSource.Token() if errTok != nil { @@ -500,6 +503,29 @@ func cliPreviewFallbackOrder(model string) []string { } } +func disableGeminiThinkingConfig(body []byte, model string) []byte { + if !geminiModelDisallowsThinking(model) { + return body + } + + updated := deleteJSONField(body, "request.generationConfig.thinkingConfig") + updated = deleteJSONField(updated, "generationConfig.thinkingConfig") + return updated +} + +func geminiModelDisallowsThinking(model string) bool { + if model == "" { + return false + } + lower := strings.ToLower(model) + for _, marker := range []string{"gemini-2.5-flash-image-preview"} { + if strings.Contains(lower, marker) { + return true + } + } + return false +} + // setJSONField sets a top-level JSON field on a byte slice payload via sjson. func setJSONField(body []byte, key, value string) []byte { if key == "" { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 2015e15d..9a94783f 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -77,6 +77,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body = disableGeminiThinkingConfig(body, req.Model) action := "generateContent" if req.Metadata != nil { @@ -134,6 +135,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + body = disableGeminiThinkingConfig(body, req.Model) url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") if opts.Alt == "" { @@ -204,6 +206,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut from := opts.SourceFormat to := sdktranslator.FromString("gemini") translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") From 31659c790d7c95756ddea8fbff698306d948b19a Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 6 Oct 2025 17:06:04 +0800 Subject: [PATCH 2/2] feat(translator/gemini-cli): support inline image data in responses --- .../chat-completions/cli_openai_response.go | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_response.go index cde7c9ed..86ea6b69 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_response.go @@ -8,6 +8,7 @@ package chat_completions import ( "bytes" "context" + "encoding/json" "fmt" "time" @@ -100,6 +101,10 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ partResult := partResults[i] partTextResult := partResult.Get("text") functionCallResult := partResult.Get("functionCall") + inlineDataResult := partResult.Get("inlineData") + if !inlineDataResult.Exists() { + inlineDataResult = partResult.Get("inline_data") + } if partTextResult.Exists() { // Handle text content, distinguishing between regular content and reasoning/thoughts. @@ -125,6 +130,34 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate) + } else if inlineDataResult.Exists() { + data := inlineDataResult.Get("data").String() + if data == "" { + continue + } + mimeType := inlineDataResult.Get("mimeType").String() + if mimeType == "" { + mimeType = inlineDataResult.Get("mime_type").String() + } + if mimeType == "" { + mimeType = "image/png" + } + imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) + imagePayload, err := json.Marshal(map[string]any{ + "type": "image_url", + "image_url": map[string]string{ + "url": imageURL, + }, + }) + if err != nil { + continue + } + imagesResult := gjson.Get(template, "choices.0.delta.images") + if !imagesResult.Exists() || !imagesResult.IsArray() { + template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) + } + template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) } } }