diff --git a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go index 3d1152f5..87e3ce0a 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go @@ -79,6 +79,23 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num) } + // Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities + // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { + var responseMods []string + for _, m := range mods.Array() { + switch strings.ToLower(m.String()) { + case "text": + responseMods = append(responseMods, "Text") + case "image": + responseMods = append(responseMods, "Image") + } + } + if len(responseMods) > 0 { + out, _ = sjson.SetBytes(out, "request.generationConfig.responseModalities", responseMods) + } + } + // OpenRouter-style image_config support // If the input uses top-level image_config.aspect_ratio, map it into request.generationConfig.imageConfig.aspectRatio. if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 075d2b55..41476a02 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -79,6 +79,23 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num) } + // Map OpenAI modalities -> Gemini generationConfig.responseModalities + // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { + var responseMods []string + for _, m := range mods.Array() { + switch strings.ToLower(m.String()) { + case "text": + responseMods = append(responseMods, "Text") + case "image": + responseMods = append(responseMods, "Image") + } + } + if len(responseMods) > 0 { + out, _ = sjson.SetBytes(out, "generationConfig.responseModalities", responseMods) + } + } + // OpenRouter-style image_config support // If the input uses top-level image_config.aspect_ratio, map it into generationConfig.imageConfig.aspectRatio. if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 842758ad..af7923ab 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -262,12 +262,5 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte } } - // OpenRouter-style image_config support at top-level - if imgCfg := root.Get("image_config"); imgCfg.Exists() && imgCfg.IsObject() { - if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { - out, _ = sjson.Set(out, "generationConfig.imageConfig.aspectRatio", ar.String()) - } - } - return []byte(out) }