From 7061cd60582da0ba06ac0bce907bc529f93c062e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 26 Oct 2025 19:35:22 +0800 Subject: [PATCH] fix(gemini): map responseModalities to uppercase IMAGE/TEXT --- internal/runtime/executor/gemini_cli_executor.go | 2 +- .../openai/chat-completions/gemini-cli_openai_request.go | 6 +++--- .../gemini/openai/chat-completions/gemini_openai_request.go | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index c59d4f34..3d7a539d 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -703,7 +703,7 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte { } rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.contents.0.parts", []byte(newPartsJson)) - rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["Image", "Text"]`)) + rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["IMAGE", "TEXT"]`)) } } rawJSON, _ = sjson.DeleteBytes(rawJSON, "request.generationConfig.imageConfig") diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index a7d7002d..28163193 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo } // Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities - // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + // e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"] if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { var responseMods []string for _, m := range mods.Array() { switch strings.ToLower(m.String()) { case "text": - responseMods = append(responseMods, "Text") + responseMods = append(responseMods, "TEXT") case "image": - responseMods = append(responseMods, "Image") + responseMods = append(responseMods, "IMAGE") } } if len(responseMods) > 0 { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 98de3195..44cad7d2 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } // Map OpenAI modalities -> Gemini generationConfig.responseModalities - // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + // e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"] if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { var responseMods []string for _, m := range mods.Array() { switch strings.ToLower(m.String()) { case "text": - responseMods = append(responseMods, "Text") + responseMods = append(responseMods, "TEXT") case "image": - responseMods = append(responseMods, "Image") + responseMods = append(responseMods, "IMAGE") } } if len(responseMods) > 0 {