From 599986495b80a8a716b3c87f215743d9505519ed Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Mon, 13 Oct 2025 02:15:55 +0800 Subject: [PATCH] feat(translator): enhance OpenAI Gemini request handling for mixed content - Replaced `contentParts` with `aggregatedParts` to support mixed content (text and inline data). - Introduced `textBuilder` for efficient text concatenation. - Added support for inline data processing, including base64-encoded image URLs. - Updated `msg["content"]` logic to handle both plain text and mixed content scenarios. --- .../openai/gemini/openai_gemini_request.go | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go index b9b27431..70fb9d2b 100644 --- a/internal/translator/openai/gemini/openai_gemini_request.go +++ b/internal/translator/openai/gemini/openai_gemini_request.go @@ -9,6 +9,7 @@ import ( "bytes" "crypto/rand" "encoding/json" + "fmt" "math/big" "strings" @@ -100,14 +101,40 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream "content": "", } - var contentParts []string + var textBuilder strings.Builder + var aggregatedParts []interface{} + onlyTextContent := true var toolCalls []interface{} if parts.Exists() && parts.IsArray() { parts.ForEach(func(_, part gjson.Result) bool { // Handle text parts if text := part.Get("text"); text.Exists() { - contentParts = append(contentParts, text.String()) + formattedText := text.String() + textBuilder.WriteString(formattedText) + aggregatedParts = append(aggregatedParts, map[string]interface{}{ + "type": "text", + "text": formattedText, + }) + } + + // Handle inline data (e.g., images) + if inlineData := part.Get("inlineData"); inlineData.Exists() { + onlyTextContent = false + + mimeType := inlineData.Get("mimeType").String() + if mimeType == "" { + mimeType = "application/octet-stream" + } + data := inlineData.Get("data").String() + imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) + + aggregatedParts = append(aggregatedParts, map[string]interface{}{ + "type": "image_url", + "image_url": map[string]interface{}{ + "url": imageURL, + }, + }) } // Handle function calls (Gemini) -> tool calls (OpenAI) @@ -175,8 +202,12 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream } // Set content - if len(contentParts) > 0 { - msg["content"] = strings.Join(contentParts, "") + if len(aggregatedParts) > 0 { + if onlyTextContent { + msg["content"] = textBuilder.String() + } else { + msg["content"] = aggregatedParts + } } // Set tool calls if any