mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-02 20:40:52 +08:00
Add support for Gemini's code_execution and url_context tools in the
request translators, enabling:
- Agentic Vision: Image analysis with Python code execution for
bounding boxes, annotations, and visual reasoning
- URL Context: Live web page content fetching and analysis
Tools are passed through using the same pattern as google_search:
- code_execution: {} -> codeExecution: {}
- url_context: {} -> urlContext: {}
Tested with Gemini 3 Flash Preview agentic vision successfully.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
405 lines
15 KiB
Go
405 lines
15 KiB
Go
// Package openai provides request translation functionality for OpenAI to Gemini API compatibility.
|
|
// It converts OpenAI Chat Completions requests into Gemini compatible JSON using gjson/sjson only.
|
|
package chat_completions
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
|
log "github.com/sirupsen/logrus"
|
|
"github.com/tidwall/gjson"
|
|
"github.com/tidwall/sjson"
|
|
)
|
|
|
|
const geminiFunctionThoughtSignature = "skip_thought_signature_validator"
|
|
|
|
// ConvertOpenAIRequestToGemini converts an OpenAI Chat Completions request (raw JSON)
|
|
// into a complete Gemini request JSON. All JSON construction uses sjson and lookups use gjson.
|
|
//
|
|
// Parameters:
|
|
// - modelName: The name of the model to use for the request
|
|
// - rawJSON: The raw JSON request data from the OpenAI API
|
|
// - stream: A boolean indicating if the request is for a streaming response (unused in current implementation)
|
|
//
|
|
// Returns:
|
|
// - []byte: The transformed request data in Gemini API format
|
|
func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
|
|
rawJSON := bytes.Clone(inputRawJSON)
|
|
// Base envelope (no default thinkingConfig)
|
|
out := []byte(`{"contents":[]}`)
|
|
|
|
// Model
|
|
out, _ = sjson.SetBytes(out, "model", modelName)
|
|
|
|
// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig.
|
|
// Inline translation-only mapping; capability checks happen later in ApplyThinking.
|
|
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
|
if re.Exists() {
|
|
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
|
if effort != "" {
|
|
thinkingPath := "generationConfig.thinkingConfig"
|
|
if effort == "auto" {
|
|
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingBudget", -1)
|
|
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", true)
|
|
} else {
|
|
out, _ = sjson.SetBytes(out, thinkingPath+".thinkingLevel", effort)
|
|
out, _ = sjson.SetBytes(out, thinkingPath+".includeThoughts", effort != "none")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Temperature/top_p/top_k
|
|
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
|
|
out, _ = sjson.SetBytes(out, "generationConfig.temperature", tr.Num)
|
|
}
|
|
if tpr := gjson.GetBytes(rawJSON, "top_p"); tpr.Exists() && tpr.Type == gjson.Number {
|
|
out, _ = sjson.SetBytes(out, "generationConfig.topP", tpr.Num)
|
|
}
|
|
if tkr := gjson.GetBytes(rawJSON, "top_k"); tkr.Exists() && tkr.Type == gjson.Number {
|
|
out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num)
|
|
}
|
|
|
|
// Candidate count (OpenAI 'n' parameter)
|
|
if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
|
|
if val := n.Int(); val > 1 {
|
|
out, _ = sjson.SetBytes(out, "generationConfig.candidateCount", val)
|
|
}
|
|
}
|
|
|
|
// Map OpenAI modalities -> Gemini generationConfig.responseModalities
|
|
// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
|
|
if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
|
|
var responseMods []string
|
|
for _, m := range mods.Array() {
|
|
switch strings.ToLower(m.String()) {
|
|
case "text":
|
|
responseMods = append(responseMods, "TEXT")
|
|
case "image":
|
|
responseMods = append(responseMods, "IMAGE")
|
|
}
|
|
}
|
|
if len(responseMods) > 0 {
|
|
out, _ = sjson.SetBytes(out, "generationConfig.responseModalities", responseMods)
|
|
}
|
|
}
|
|
|
|
// OpenRouter-style image_config support
|
|
// If the input uses top-level image_config.aspect_ratio, map it into generationConfig.imageConfig.aspectRatio.
|
|
if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() {
|
|
if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String {
|
|
out, _ = sjson.SetBytes(out, "generationConfig.imageConfig.aspectRatio", ar.Str)
|
|
}
|
|
if size := imgCfg.Get("image_size"); size.Exists() && size.Type == gjson.String {
|
|
out, _ = sjson.SetBytes(out, "generationConfig.imageConfig.imageSize", size.Str)
|
|
}
|
|
}
|
|
|
|
// messages -> systemInstruction + contents
|
|
messages := gjson.GetBytes(rawJSON, "messages")
|
|
if messages.IsArray() {
|
|
arr := messages.Array()
|
|
// First pass: assistant tool_calls id->name map
|
|
tcID2Name := map[string]string{}
|
|
for i := 0; i < len(arr); i++ {
|
|
m := arr[i]
|
|
if m.Get("role").String() == "assistant" {
|
|
tcs := m.Get("tool_calls")
|
|
if tcs.IsArray() {
|
|
for _, tc := range tcs.Array() {
|
|
if tc.Get("type").String() == "function" {
|
|
id := tc.Get("id").String()
|
|
name := tc.Get("function.name").String()
|
|
if id != "" && name != "" {
|
|
tcID2Name[id] = name
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Second pass build systemInstruction/tool responses cache
|
|
toolResponses := map[string]string{} // tool_call_id -> response text
|
|
for i := 0; i < len(arr); i++ {
|
|
m := arr[i]
|
|
role := m.Get("role").String()
|
|
if role == "tool" {
|
|
toolCallID := m.Get("tool_call_id").String()
|
|
if toolCallID != "" {
|
|
c := m.Get("content")
|
|
toolResponses[toolCallID] = c.Raw
|
|
}
|
|
}
|
|
}
|
|
|
|
systemPartIndex := 0
|
|
for i := 0; i < len(arr); i++ {
|
|
m := arr[i]
|
|
role := m.Get("role").String()
|
|
content := m.Get("content")
|
|
|
|
if (role == "system" || role == "developer") && len(arr) > 1 {
|
|
// system -> system_instruction as a user message style
|
|
if content.Type == gjson.String {
|
|
out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
|
|
out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.String())
|
|
systemPartIndex++
|
|
} else if content.IsObject() && content.Get("type").String() == "text" {
|
|
out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
|
|
out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), content.Get("text").String())
|
|
systemPartIndex++
|
|
} else if content.IsArray() {
|
|
contents := content.Array()
|
|
if len(contents) > 0 {
|
|
out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
|
|
for j := 0; j < len(contents); j++ {
|
|
out, _ = sjson.SetBytes(out, fmt.Sprintf("system_instruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
|
|
systemPartIndex++
|
|
}
|
|
}
|
|
}
|
|
} else if role == "user" || ((role == "system" || role == "developer") && len(arr) == 1) {
|
|
// Build single user content node to avoid splitting into multiple contents
|
|
node := []byte(`{"role":"user","parts":[]}`)
|
|
if content.Type == gjson.String {
|
|
node, _ = sjson.SetBytes(node, "parts.0.text", content.String())
|
|
} else if content.IsArray() {
|
|
items := content.Array()
|
|
p := 0
|
|
for _, item := range items {
|
|
switch item.Get("type").String() {
|
|
case "text":
|
|
text := item.Get("text").String()
|
|
if text != "" {
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
|
|
}
|
|
p++
|
|
case "image_url":
|
|
imageURL := item.Get("image_url.url").String()
|
|
if len(imageURL) > 5 {
|
|
pieces := strings.SplitN(imageURL[5:], ";", 2)
|
|
if len(pieces) == 2 && len(pieces[1]) > 7 {
|
|
mime := pieces[0]
|
|
data := pieces[1][7:]
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiFunctionThoughtSignature)
|
|
p++
|
|
}
|
|
}
|
|
case "file":
|
|
filename := item.Get("file.filename").String()
|
|
fileData := item.Get("file.file_data").String()
|
|
ext := ""
|
|
if sp := strings.Split(filename, "."); len(sp) > 1 {
|
|
ext = sp[len(sp)-1]
|
|
}
|
|
if mimeType, ok := misc.MimeTypes[ext]; ok {
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mimeType)
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", fileData)
|
|
p++
|
|
} else {
|
|
log.Warnf("Unknown file name extension '%s' in user message, skip", ext)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
|
|
} else if role == "assistant" {
|
|
node := []byte(`{"role":"model","parts":[]}`)
|
|
p := 0
|
|
if content.Type == gjson.String {
|
|
// Assistant text -> single model content
|
|
node, _ = sjson.SetBytes(node, "parts.-1.text", content.String())
|
|
p++
|
|
} else if content.IsArray() {
|
|
// Assistant multimodal content (e.g. text + image) -> single model content with parts
|
|
for _, item := range content.Array() {
|
|
switch item.Get("type").String() {
|
|
case "text":
|
|
text := item.Get("text").String()
|
|
if text != "" {
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
|
|
}
|
|
p++
|
|
case "image_url":
|
|
// If the assistant returned an inline data URL, preserve it for history fidelity.
|
|
imageURL := item.Get("image_url.url").String()
|
|
if len(imageURL) > 5 { // expect data:...
|
|
pieces := strings.SplitN(imageURL[5:], ";", 2)
|
|
if len(pieces) == 2 && len(pieces[1]) > 7 {
|
|
mime := pieces[0]
|
|
data := pieces[1][7:]
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiFunctionThoughtSignature)
|
|
p++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Tool calls -> single model content with functionCall parts
|
|
tcs := m.Get("tool_calls")
|
|
if tcs.IsArray() {
|
|
fIDs := make([]string, 0)
|
|
for _, tc := range tcs.Array() {
|
|
if tc.Get("type").String() != "function" {
|
|
continue
|
|
}
|
|
fid := tc.Get("id").String()
|
|
fname := tc.Get("function.name").String()
|
|
fargs := tc.Get("function.arguments").String()
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".functionCall.name", fname)
|
|
node, _ = sjson.SetRawBytes(node, "parts."+itoa(p)+".functionCall.args", []byte(fargs))
|
|
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiFunctionThoughtSignature)
|
|
p++
|
|
if fid != "" {
|
|
fIDs = append(fIDs, fid)
|
|
}
|
|
}
|
|
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
|
|
|
|
// Append a single tool content combining name + response per function
|
|
toolNode := []byte(`{"role":"user","parts":[]}`)
|
|
pp := 0
|
|
for _, fid := range fIDs {
|
|
if name, ok := tcID2Name[fid]; ok {
|
|
toolNode, _ = sjson.SetBytes(toolNode, "parts."+itoa(pp)+".functionResponse.name", name)
|
|
resp := toolResponses[fid]
|
|
if resp == "" {
|
|
resp = "{}"
|
|
}
|
|
toolNode, _ = sjson.SetBytes(toolNode, "parts."+itoa(pp)+".functionResponse.response.result", []byte(resp))
|
|
pp++
|
|
}
|
|
}
|
|
if pp > 0 {
|
|
out, _ = sjson.SetRawBytes(out, "contents.-1", toolNode)
|
|
}
|
|
} else {
|
|
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// tools -> tools[].functionDeclarations + tools[].googleSearch/codeExecution/urlContext passthrough
|
|
tools := gjson.GetBytes(rawJSON, "tools")
|
|
if tools.IsArray() && len(tools.Array()) > 0 {
|
|
functionToolNode := []byte(`{}`)
|
|
hasFunction := false
|
|
googleSearchNodes := make([][]byte, 0)
|
|
codeExecutionNodes := make([][]byte, 0)
|
|
urlContextNodes := make([][]byte, 0)
|
|
for _, t := range tools.Array() {
|
|
if t.Get("type").String() == "function" {
|
|
fn := t.Get("function")
|
|
if fn.Exists() && fn.IsObject() {
|
|
fnRaw := fn.Raw
|
|
if fn.Get("parameters").Exists() {
|
|
renamed, errRename := util.RenameKey(fnRaw, "parameters", "parametersJsonSchema")
|
|
if errRename != nil {
|
|
log.Warnf("Failed to rename parameters for tool '%s': %v", fn.Get("name").String(), errRename)
|
|
var errSet error
|
|
fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.type", "object")
|
|
if errSet != nil {
|
|
log.Warnf("Failed to set default schema type for tool '%s': %v", fn.Get("name").String(), errSet)
|
|
continue
|
|
}
|
|
fnRaw, errSet = sjson.SetRaw(fnRaw, "parametersJsonSchema.properties", `{}`)
|
|
if errSet != nil {
|
|
log.Warnf("Failed to set default schema properties for tool '%s': %v", fn.Get("name").String(), errSet)
|
|
continue
|
|
}
|
|
} else {
|
|
fnRaw = renamed
|
|
}
|
|
} else {
|
|
var errSet error
|
|
fnRaw, errSet = sjson.Set(fnRaw, "parametersJsonSchema.type", "object")
|
|
if errSet != nil {
|
|
log.Warnf("Failed to set default schema type for tool '%s': %v", fn.Get("name").String(), errSet)
|
|
continue
|
|
}
|
|
fnRaw, errSet = sjson.SetRaw(fnRaw, "parametersJsonSchema.properties", `{}`)
|
|
if errSet != nil {
|
|
log.Warnf("Failed to set default schema properties for tool '%s': %v", fn.Get("name").String(), errSet)
|
|
continue
|
|
}
|
|
}
|
|
fnRaw, _ = sjson.Delete(fnRaw, "strict")
|
|
if !hasFunction {
|
|
functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
|
|
}
|
|
tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
|
|
if errSet != nil {
|
|
log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
|
|
continue
|
|
}
|
|
functionToolNode = tmp
|
|
hasFunction = true
|
|
}
|
|
}
|
|
if gs := t.Get("google_search"); gs.Exists() {
|
|
googleToolNode := []byte(`{}`)
|
|
var errSet error
|
|
googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
|
|
if errSet != nil {
|
|
log.Warnf("Failed to set googleSearch tool: %v", errSet)
|
|
continue
|
|
}
|
|
googleSearchNodes = append(googleSearchNodes, googleToolNode)
|
|
}
|
|
if ce := t.Get("code_execution"); ce.Exists() {
|
|
codeToolNode := []byte(`{}`)
|
|
var errSet error
|
|
codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
|
|
if errSet != nil {
|
|
log.Warnf("Failed to set codeExecution tool: %v", errSet)
|
|
continue
|
|
}
|
|
codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
|
|
}
|
|
if uc := t.Get("url_context"); uc.Exists() {
|
|
urlToolNode := []byte(`{}`)
|
|
var errSet error
|
|
urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
|
|
if errSet != nil {
|
|
log.Warnf("Failed to set urlContext tool: %v", errSet)
|
|
continue
|
|
}
|
|
urlContextNodes = append(urlContextNodes, urlToolNode)
|
|
}
|
|
}
|
|
if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
|
|
toolsNode := []byte("[]")
|
|
if hasFunction {
|
|
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
|
|
}
|
|
for _, googleNode := range googleSearchNodes {
|
|
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
|
|
}
|
|
for _, codeNode := range codeExecutionNodes {
|
|
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
|
|
}
|
|
for _, urlNode := range urlContextNodes {
|
|
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
|
|
}
|
|
out, _ = sjson.SetRawBytes(out, "tools", toolsNode)
|
|
}
|
|
}
|
|
|
|
out = common.AttachDefaultSafetySettings(out, "safetySettings")
|
|
|
|
return out
|
|
}
|
|
|
|
// itoa converts int to string without strconv import for few usages.
|
|
func itoa(i int) string { return fmt.Sprintf("%d", i) }
|