diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go index 439755a6..764bb5c9 100644 --- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go +++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go @@ -143,21 +143,63 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte } switch typ { case "message": - // Determine role from content type (input_text=user, output_text=assistant) + // Determine role and construct Claude-compatible content parts. var role string - var text strings.Builder + var textAggregate strings.Builder + var partsJSON []string + hasImage := false if parts := item.Get("content"); parts.Exists() && parts.IsArray() { parts.ForEach(func(_, part gjson.Result) bool { ptype := part.Get("type").String() - if ptype == "input_text" || ptype == "output_text" { + switch ptype { + case "input_text", "output_text": if t := part.Get("text"); t.Exists() { - text.WriteString(t.String()) + txt := t.String() + textAggregate.WriteString(txt) + contentPart := `{"type":"text","text":""}` + contentPart, _ = sjson.Set(contentPart, "text", txt) + partsJSON = append(partsJSON, contentPart) } if ptype == "input_text" { role = "user" - } else if ptype == "output_text" { + } else { role = "assistant" } + case "input_image": + url := part.Get("image_url").String() + if url == "" { + url = part.Get("url").String() + } + if url != "" { + var contentPart string + if strings.HasPrefix(url, "data:") { + trimmed := strings.TrimPrefix(url, "data:") + mediaAndData := strings.SplitN(trimmed, ";base64,", 2) + mediaType := "application/octet-stream" + data := "" + if len(mediaAndData) == 2 { + if mediaAndData[0] != "" { + mediaType = mediaAndData[0] + } + data = mediaAndData[1] + } + if data != "" { + contentPart = `{"type":"image","source":{"type":"base64","media_type":"","data":""}}` + contentPart, _ = sjson.Set(contentPart, "source.media_type", mediaType) + contentPart, _ = sjson.Set(contentPart, "source.data", data) + } + } else { + contentPart = `{"type":"image","source":{"type":"url","url":""}}` + contentPart, _ = sjson.Set(contentPart, "source.url", url) + } + if contentPart != "" { + partsJSON = append(partsJSON, contentPart) + if role == "" { + role = "user" + } + hasImage = true + } + } } return true }) @@ -174,14 +216,24 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte } } - if text.Len() > 0 || role == "system" { + if len(partsJSON) > 0 { + msg := `{"role":"","content":[]}` + msg, _ = sjson.Set(msg, "role", role) + if len(partsJSON) == 1 && !hasImage { + // Preserve legacy behavior for single text content + msg, _ = sjson.Delete(msg, "content") + textPart := gjson.Parse(partsJSON[0]) + msg, _ = sjson.Set(msg, "content", textPart.Get("text").String()) + } else { + for _, partJSON := range partsJSON { + msg, _ = sjson.SetRaw(msg, "content.-1", partJSON) + } + } + out, _ = sjson.SetRaw(out, "messages.-1", msg) + } else if textAggregate.Len() > 0 || role == "system" { msg := `{"role":"","content":""}` msg, _ = sjson.Set(msg, "role", role) - if text.Len() > 0 { - msg, _ = sjson.Set(msg, "content", text.String()) - } else { - msg, _ = sjson.Set(msg, "content", "") - } + msg, _ = sjson.Set(msg, "content", textAggregate.String()) out, _ = sjson.SetRaw(out, "messages.-1", msg) } diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go index 66b5cd85..8036d5d5 100644 --- a/internal/translator/codex/claude/codex_claude_request.go +++ b/internal/translator/codex/claude/codex_claude_request.go @@ -68,36 +68,79 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) for i := 0; i < len(messageResults); i++ { messageResult := messageResults[i] + messageRole := messageResult.Get("role").String() + + newMessage := func() string { + msg := `{"type": "message","role":"","content":[]}` + msg, _ = sjson.Set(msg, "role", messageRole) + return msg + } + + message := newMessage() + contentIndex := 0 + hasContent := false + + flushMessage := func() { + if hasContent { + template, _ = sjson.SetRaw(template, "input.-1", message) + message = newMessage() + contentIndex = 0 + hasContent = false + } + } + + appendTextContent := func(text string) { + partType := "input_text" + if messageRole == "assistant" { + partType = "output_text" + } + message, _ = sjson.Set(message, fmt.Sprintf("content.%d.type", contentIndex), partType) + message, _ = sjson.Set(message, fmt.Sprintf("content.%d.text", contentIndex), text) + contentIndex++ + hasContent = true + } + + appendImageContent := func(dataURL string) { + message, _ = sjson.Set(message, fmt.Sprintf("content.%d.type", contentIndex), "input_image") + message, _ = sjson.Set(message, fmt.Sprintf("content.%d.image_url", contentIndex), dataURL) + contentIndex++ + hasContent = true + } messageContentsResult := messageResult.Get("content") if messageContentsResult.IsArray() { messageContentResults := messageContentsResult.Array() for j := 0; j < len(messageContentResults); j++ { messageContentResult := messageContentResults[j] - messageContentTypeResult := messageContentResult.Get("type") - contentType := messageContentTypeResult.String() + contentType := messageContentResult.Get("type").String() - if contentType == "text" { - // Handle text content by creating appropriate message structure. - message := `{"type": "message","role":"","content":[]}` - messageRole := messageResult.Get("role").String() - message, _ = sjson.Set(message, "role", messageRole) - - partType := "input_text" - if messageRole == "assistant" { - partType = "output_text" + switch contentType { + case "text": + appendTextContent(messageContentResult.Get("text").String()) + case "image": + sourceResult := messageContentResult.Get("source") + if sourceResult.Exists() { + data := sourceResult.Get("data").String() + if data == "" { + data = sourceResult.Get("base64").String() + } + if data != "" { + mediaType := sourceResult.Get("media_type").String() + if mediaType == "" { + mediaType = sourceResult.Get("mime_type").String() + } + if mediaType == "" { + mediaType = "application/octet-stream" + } + dataURL := fmt.Sprintf("data:%s;base64,%s", mediaType, data) + appendImageContent(dataURL) + } } - - currentIndex := len(gjson.Get(message, "content").Array()) - message, _ = sjson.Set(message, fmt.Sprintf("content.%d.type", currentIndex), partType) - message, _ = sjson.Set(message, fmt.Sprintf("content.%d.text", currentIndex), messageContentResult.Get("text").String()) - template, _ = sjson.SetRaw(template, "input.-1", message) - } else if contentType == "tool_use" { - // Handle tool use content by creating function call message. + case "tool_use": + flushMessage() functionCallMessage := `{"type":"function_call"}` functionCallMessage, _ = sjson.Set(functionCallMessage, "call_id", messageContentResult.Get("id").String()) { - // Shorten tool name if needed based on declared tools name := messageContentResult.Get("name").String() toolMap := buildReverseMapFromClaudeOriginalToShort(rawJSON) if short, ok := toolMap[name]; ok { @@ -109,28 +152,18 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) } functionCallMessage, _ = sjson.Set(functionCallMessage, "arguments", messageContentResult.Get("input").Raw) template, _ = sjson.SetRaw(template, "input.-1", functionCallMessage) - } else if contentType == "tool_result" { - // Handle tool result content by creating function call output message. + case "tool_result": + flushMessage() functionCallOutputMessage := `{"type":"function_call_output"}` functionCallOutputMessage, _ = sjson.Set(functionCallOutputMessage, "call_id", messageContentResult.Get("tool_use_id").String()) functionCallOutputMessage, _ = sjson.Set(functionCallOutputMessage, "output", messageContentResult.Get("content").String()) template, _ = sjson.SetRaw(template, "input.-1", functionCallOutputMessage) } } + flushMessage() } else if messageContentsResult.Type == gjson.String { - // Handle string content by creating appropriate message structure. - message := `{"type": "message","role":"","content":[]}` - messageRole := messageResult.Get("role").String() - message, _ = sjson.Set(message, "role", messageRole) - - partType := "input_text" - if messageRole == "assistant" { - partType = "output_text" - } - - message, _ = sjson.Set(message, "content.0.type", partType) - message, _ = sjson.Set(message, "content.0.text", messageContentsResult.String()) - template, _ = sjson.SetRaw(template, "input.-1", message) + appendTextContent(messageContentsResult.String()) + flushMessage() } }