diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go index a131a675..dac4c970 100644 --- a/internal/translator/openai/claude/openai_claude_response.go +++ b/internal/translator/openai/claude/openai_claude_response.go @@ -32,6 +32,8 @@ type ConvertOpenAIResponseToAnthropicParams struct { ToolCallsAccumulator map[int]*ToolCallAccumulator // Track if text content block has been started TextContentBlockStarted bool + // Track if thinking content block has been started + ThinkingContentBlockStarted bool // Track finish reason for later use FinishReason string // Track if content blocks have been stopped @@ -40,6 +42,16 @@ type ConvertOpenAIResponseToAnthropicParams struct { MessageDeltaSent bool // Track if message_start has been sent MessageStarted bool + // Track if message_stop has been sent + MessageStopSent bool + // Tool call content block index mapping + ToolCallBlockIndexes map[int]int + // Index assigned to text content block + TextContentBlockIndex int + // Index assigned to thinking content block + ThinkingContentBlockIndex int + // Next available content block index + NextContentBlockIndex int } // ToolCallAccumulator holds the state for accumulating tool call data @@ -64,15 +76,20 @@ type ToolCallAccumulator struct { func ConvertOpenAIResponseToClaude(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string { if *param == nil { *param = &ConvertOpenAIResponseToAnthropicParams{ - MessageID: "", - Model: "", - CreatedAt: 0, - ContentAccumulator: strings.Builder{}, - ToolCallsAccumulator: nil, - TextContentBlockStarted: false, - FinishReason: "", - ContentBlocksStopped: false, - MessageDeltaSent: false, + MessageID: "", + Model: "", + CreatedAt: 0, + ContentAccumulator: strings.Builder{}, + ToolCallsAccumulator: nil, + TextContentBlockStarted: false, + ThinkingContentBlockStarted: false, + FinishReason: "", + ContentBlocksStopped: false, + MessageDeltaSent: false, + ToolCallBlockIndexes: make(map[int]int), + TextContentBlockIndex: -1, + ThinkingContentBlockIndex: -1, + NextContentBlockIndex: 0, } } @@ -138,13 +155,56 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI // Don't send content_block_start for text here - wait for actual content } + // Handle reasoning content delta + if reasoning := delta.Get("reasoning_content"); reasoning.Exists() { + for _, reasoningText := range collectOpenAIReasoningTexts(reasoning) { + if reasoningText == "" { + continue + } + stopTextContentBlock(param, &results) + if !param.ThinkingContentBlockStarted { + if param.ThinkingContentBlockIndex == -1 { + param.ThinkingContentBlockIndex = param.NextContentBlockIndex + param.NextContentBlockIndex++ + } + contentBlockStart := map[string]interface{}{ + "type": "content_block_start", + "index": param.ThinkingContentBlockIndex, + "content_block": map[string]interface{}{ + "type": "thinking", + "thinking": "", + }, + } + contentBlockStartJSON, _ := json.Marshal(contentBlockStart) + results = append(results, "event: content_block_start\ndata: "+string(contentBlockStartJSON)+"\n\n") + param.ThinkingContentBlockStarted = true + } + + thinkingDelta := map[string]interface{}{ + "type": "content_block_delta", + "index": param.ThinkingContentBlockIndex, + "delta": map[string]interface{}{ + "type": "thinking_delta", + "thinking": reasoningText, + }, + } + thinkingDeltaJSON, _ := json.Marshal(thinkingDelta) + results = append(results, "event: content_block_delta\ndata: "+string(thinkingDeltaJSON)+"\n\n") + } + } + // Handle content delta if content := delta.Get("content"); content.Exists() && content.String() != "" { // Send content_block_start for text if not already sent if !param.TextContentBlockStarted { + stopThinkingContentBlock(param, &results) + if param.TextContentBlockIndex == -1 { + param.TextContentBlockIndex = param.NextContentBlockIndex + param.NextContentBlockIndex++ + } contentBlockStart := map[string]interface{}{ "type": "content_block_start", - "index": 0, + "index": param.TextContentBlockIndex, "content_block": map[string]interface{}{ "type": "text", "text": "", @@ -157,7 +217,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI contentDelta := map[string]interface{}{ "type": "content_block_delta", - "index": 0, + "index": param.TextContentBlockIndex, "delta": map[string]interface{}{ "type": "text_delta", "text": content.String(), @@ -178,6 +238,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI toolCalls.ForEach(func(_, toolCall gjson.Result) bool { index := int(toolCall.Get("index").Int()) + blockIndex := param.toolContentBlockIndex(index) // Initialize accumulator if needed if _, exists := param.ToolCallsAccumulator[index]; !exists { @@ -196,20 +257,14 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI if name := function.Get("name"); name.Exists() { accumulator.Name = name.String() - if param.TextContentBlockStarted { - param.TextContentBlockStarted = false - contentBlockStop := map[string]interface{}{ - "type": "content_block_stop", - "index": index, - } - contentBlockStopJSON, _ := json.Marshal(contentBlockStop) - results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n") - } + stopThinkingContentBlock(param, &results) + + stopTextContentBlock(param, &results) // Send content_block_start for tool_use contentBlockStart := map[string]interface{}{ "type": "content_block_start", - "index": index + 1, // Offset by 1 since text is at index 0 + "index": blockIndex, "content_block": map[string]interface{}{ "type": "tool_use", "id": accumulator.ID, @@ -240,26 +295,32 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI reason := finishReason.String() param.FinishReason = reason - // Send content_block_stop for text if text content block was started - if param.TextContentBlockStarted && !param.ContentBlocksStopped { + // Send content_block_stop for thinking content if needed + if param.ThinkingContentBlockStarted { contentBlockStop := map[string]interface{}{ "type": "content_block_stop", - "index": 0, + "index": param.ThinkingContentBlockIndex, } contentBlockStopJSON, _ := json.Marshal(contentBlockStop) results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n") + param.ThinkingContentBlockStarted = false + param.ThinkingContentBlockIndex = -1 } + // Send content_block_stop for text if text content block was started + stopTextContentBlock(param, &results) + // Send content_block_stop for any tool calls if !param.ContentBlocksStopped { for index := range param.ToolCallsAccumulator { accumulator := param.ToolCallsAccumulator[index] + blockIndex := param.toolContentBlockIndex(index) // Send complete input_json_delta with all accumulated arguments if accumulator.Arguments.Len() > 0 { inputDelta := map[string]interface{}{ "type": "content_block_delta", - "index": index + 1, + "index": blockIndex, "delta": map[string]interface{}{ "type": "input_json_delta", "partial_json": util.FixJSON(accumulator.Arguments.String()), @@ -271,10 +332,11 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI contentBlockStop := map[string]interface{}{ "type": "content_block_stop", - "index": index + 1, + "index": blockIndex, } contentBlockStopJSON, _ := json.Marshal(contentBlockStop) results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n") + delete(param.ToolCallBlockIndexes, index) } param.ContentBlocksStopped = true } @@ -314,6 +376,8 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI results = append(results, "event: message_delta\ndata: "+string(messageDeltaJSON)+"\n\n") param.MessageDeltaSent = true + emitMessageStopIfNeeded(param, &results) + } return results @@ -323,6 +387,49 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams) []string { var results []string + // Ensure all content blocks are stopped before final events + if param.ThinkingContentBlockStarted { + contentBlockStop := map[string]interface{}{ + "type": "content_block_stop", + "index": param.ThinkingContentBlockIndex, + } + contentBlockStopJSON, _ := json.Marshal(contentBlockStop) + results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n") + param.ThinkingContentBlockStarted = false + param.ThinkingContentBlockIndex = -1 + } + + stopTextContentBlock(param, &results) + + if !param.ContentBlocksStopped { + for index := range param.ToolCallsAccumulator { + accumulator := param.ToolCallsAccumulator[index] + blockIndex := param.toolContentBlockIndex(index) + + if accumulator.Arguments.Len() > 0 { + inputDelta := map[string]interface{}{ + "type": "content_block_delta", + "index": blockIndex, + "delta": map[string]interface{}{ + "type": "input_json_delta", + "partial_json": util.FixJSON(accumulator.Arguments.String()), + }, + } + inputDeltaJSON, _ := json.Marshal(inputDelta) + results = append(results, "event: content_block_delta\ndata: "+string(inputDeltaJSON)+"\n\n") + } + + contentBlockStop := map[string]interface{}{ + "type": "content_block_stop", + "index": blockIndex, + } + contentBlockStopJSON, _ := json.Marshal(contentBlockStop) + results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n") + delete(param.ToolCallBlockIndexes, index) + } + param.ContentBlocksStopped = true + } + // If we haven't sent message_delta yet (no usage info was received), send it now if param.FinishReason != "" && !param.MessageDeltaSent { messageDelta := map[string]interface{}{ @@ -338,8 +445,7 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams) param.MessageDeltaSent = true } - // Send message_stop - results = append(results, "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n") + emitMessageStopIfNeeded(param, &results) return results } @@ -368,6 +474,18 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string { if choices := root.Get("choices"); choices.Exists() && choices.IsArray() { choice := choices.Array()[0] // Take first choice + reasoningNode := choice.Get("message.reasoning_content") + allReasoning := collectOpenAIReasoningTexts(reasoningNode) + + for _, reasoningText := range allReasoning { + if reasoningText == "" { + continue + } + contentBlocks = append(contentBlocks, map[string]interface{}{ + "type": "thinking", + "thinking": reasoningText, + }) + } // Handle text content if content := choice.Get("message.content"); content.Exists() && content.String() != "" { @@ -419,6 +537,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string { response["usage"] = map[string]interface{}{ "input_tokens": usage.Get("prompt_tokens").Int(), "output_tokens": usage.Get("completion_tokens").Int(), + "reasoning_tokens": func() int64 { + if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() { + return v.Int() + } + return 0 + }(), } } else { response["usage"] = map[string]interface{}{ @@ -449,6 +573,84 @@ func mapOpenAIFinishReasonToAnthropic(openAIReason string) string { } } +func (p *ConvertOpenAIResponseToAnthropicParams) toolContentBlockIndex(openAIToolIndex int) int { + if idx, ok := p.ToolCallBlockIndexes[openAIToolIndex]; ok { + return idx + } + idx := p.NextContentBlockIndex + p.NextContentBlockIndex++ + p.ToolCallBlockIndexes[openAIToolIndex] = idx + return idx +} + +func collectOpenAIReasoningTexts(node gjson.Result) []string { + var texts []string + if !node.Exists() { + return texts + } + + if node.IsArray() { + node.ForEach(func(_, value gjson.Result) bool { + texts = append(texts, collectOpenAIReasoningTexts(value)...) + return true + }) + return texts + } + + switch node.Type { + case gjson.String: + if text := strings.TrimSpace(node.String()); text != "" { + texts = append(texts, text) + } + case gjson.JSON: + if text := node.Get("text"); text.Exists() { + if trimmed := strings.TrimSpace(text.String()); trimmed != "" { + texts = append(texts, trimmed) + } + } else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") { + texts = append(texts, raw) + } + } + + return texts +} + +func stopThinkingContentBlock(param *ConvertOpenAIResponseToAnthropicParams, results *[]string) { + if !param.ThinkingContentBlockStarted { + return + } + contentBlockStop := map[string]interface{}{ + "type": "content_block_stop", + "index": param.ThinkingContentBlockIndex, + } + contentBlockStopJSON, _ := json.Marshal(contentBlockStop) + *results = append(*results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n") + param.ThinkingContentBlockStarted = false + param.ThinkingContentBlockIndex = -1 +} + +func emitMessageStopIfNeeded(param *ConvertOpenAIResponseToAnthropicParams, results *[]string) { + if param.MessageStopSent { + return + } + *results = append(*results, "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n") + param.MessageStopSent = true +} + +func stopTextContentBlock(param *ConvertOpenAIResponseToAnthropicParams, results *[]string) { + if !param.TextContentBlockStarted { + return + } + contentBlockStop := map[string]interface{}{ + "type": "content_block_stop", + "index": param.TextContentBlockIndex, + } + contentBlockStopJSON, _ := json.Marshal(contentBlockStop) + *results = append(*results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n") + param.TextContentBlockStarted = false + param.TextContentBlockIndex = -1 +} + // ConvertOpenAIResponseToClaudeNonStream converts a non-streaming OpenAI response to a non-streaming Anthropic response. // // Parameters: @@ -576,6 +778,18 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina } } + if reasoning := message.Get("reasoning_content"); reasoning.Exists() { + for _, reasoningText := range collectOpenAIReasoningTexts(reasoning) { + if reasoningText == "" { + continue + } + contentBlocks = append(contentBlocks, map[string]interface{}{ + "type": "thinking", + "thinking": reasoningText, + }) + } + } + if toolCalls := message.Get("tool_calls"); toolCalls.Exists() && toolCalls.IsArray() { toolCalls.ForEach(func(_, toolCall gjson.Result) bool { hasToolCall = true diff --git a/internal/translator/openai/gemini/openai_gemini_response.go b/internal/translator/openai/gemini/openai_gemini_response.go index d2f45db1..6dd0ca92 100644 --- a/internal/translator/openai/gemini/openai_gemini_response.go +++ b/internal/translator/openai/gemini/openai_gemini_response.go @@ -89,6 +89,9 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR "candidatesTokenCount": usage.Get("completion_tokens").Int(), "totalTokenCount": usage.Get("total_tokens").Int(), } + if reasoningTokens := reasoningTokensFromUsage(usage); reasoningTokens > 0 { + usageObj["thoughtsTokenCount"] = reasoningTokens + } template, _ = sjson.Set(template, "usageMetadata", usageObj) return []string{template} } @@ -108,6 +111,7 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR _ = int(choice.Get("index").Int()) // choiceIdx not used in streaming delta := choice.Get("delta") + baseTemplate := template // Handle role (only in first chunk) if role := delta.Get("role"); role.Exists() && (*param).(*ConvertOpenAIResponseToGeminiParams).IsFirstChunk { @@ -120,6 +124,26 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR return true } + var chunkOutputs []string + + // Handle reasoning/thinking delta + if reasoning := delta.Get("reasoning_content"); reasoning.Exists() { + for _, reasoningText := range extractReasoningTexts(reasoning) { + if reasoningText == "" { + continue + } + reasoningTemplate := baseTemplate + parts := []interface{}{ + map[string]interface{}{ + "thought": true, + "text": reasoningText, + }, + } + reasoningTemplate, _ = sjson.Set(reasoningTemplate, "candidates.0.content.parts", parts) + chunkOutputs = append(chunkOutputs, reasoningTemplate) + } + } + // Handle content delta if content := delta.Get("content"); content.Exists() && content.String() != "" { contentText := content.String() @@ -131,8 +155,13 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR "text": contentText, }, } - template, _ = sjson.Set(template, "candidates.0.content.parts", parts) - results = append(results, template) + contentTemplate := baseTemplate + contentTemplate, _ = sjson.Set(contentTemplate, "candidates.0.content.parts", parts) + chunkOutputs = append(chunkOutputs, contentTemplate) + } + + if len(chunkOutputs) > 0 { + results = append(results, chunkOutputs...) return true } @@ -231,6 +260,9 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR "candidatesTokenCount": usage.Get("completion_tokens").Int(), "totalTokenCount": usage.Get("total_tokens").Int(), } + if reasoningTokens := reasoningTokensFromUsage(usage); reasoningTokens > 0 { + usageObj["thoughtsTokenCount"] = reasoningTokens + } template, _ = sjson.Set(template, "usageMetadata", usageObj) results = append(results, template) return true @@ -549,6 +581,19 @@ func ConvertOpenAIResponseToGeminiNonStream(_ context.Context, _ string, origina var parts []interface{} + // Handle reasoning content before visible text + if reasoning := message.Get("reasoning_content"); reasoning.Exists() { + for _, reasoningText := range extractReasoningTexts(reasoning) { + if reasoningText == "" { + continue + } + parts = append(parts, map[string]interface{}{ + "thought": true, + "text": reasoningText, + }) + } + } + // Handle content first if content := message.Get("content"); content.Exists() && content.String() != "" { parts = append(parts, map[string]interface{}{ @@ -605,6 +650,9 @@ func ConvertOpenAIResponseToGeminiNonStream(_ context.Context, _ string, origina "candidatesTokenCount": usage.Get("completion_tokens").Int(), "totalTokenCount": usage.Get("total_tokens").Int(), } + if reasoningTokens := reasoningTokensFromUsage(usage); reasoningTokens > 0 { + usageObj["thoughtsTokenCount"] = reasoningTokens + } out, _ = sjson.Set(out, "usageMetadata", usageObj) } @@ -614,3 +662,47 @@ func ConvertOpenAIResponseToGeminiNonStream(_ context.Context, _ string, origina func GeminiTokenCount(ctx context.Context, count int64) string { return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count) } + +func reasoningTokensFromUsage(usage gjson.Result) int64 { + if usage.Exists() { + if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() { + return v.Int() + } + if v := usage.Get("output_tokens_details.reasoning_tokens"); v.Exists() { + return v.Int() + } + } + return 0 +} + +func extractReasoningTexts(node gjson.Result) []string { + var texts []string + if !node.Exists() { + return texts + } + + if node.IsArray() { + node.ForEach(func(_, value gjson.Result) bool { + texts = append(texts, extractReasoningTexts(value)...) + return true + }) + return texts + } + + switch node.Type { + case gjson.String: + if text := strings.TrimSpace(node.String()); text != "" { + texts = append(texts, text) + } + case gjson.JSON: + if text := node.Get("text"); text.Exists() { + if trimmed := strings.TrimSpace(text.String()); trimmed != "" { + texts = append(texts, trimmed) + } + } else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") { + texts = append(texts, raw) + } + } + + return texts +}