From cf9daf470ca7698c08970f6cb22aede8af2677c5 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 11:23:44 +0800 Subject: [PATCH] feat(translator): report cached token usage in Claude output --- .../codex/claude/codex_claude_response.go | 39 +++++++++++--- .../openai/claude/openai_claude_response.go | 53 +++++++++++++------ 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go index c700ef84..5223cd94 100644 --- a/internal/translator/codex/claude/codex_claude_response.go +++ b/internal/translator/codex/claude/codex_claude_response.go @@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa } else { template, _ = sjson.Set(template, "delta.stop_reason", "end_turn") } - template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int()) - template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage")) + template, _ = sjson.Set(template, "usage.input_tokens", inputTokens) + template, _ = sjson.Set(template, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens) + } output = "event: message_delta\n" output += fmt.Sprintf("data: %s\n\n", template) @@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}` out, _ = sjson.Set(out, "id", responseData.Get("id").String()) out, _ = sjson.Set(out, "model", responseData.Get("model").String()) - out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage")) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) + } hasToolCall := false @@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw) } - if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int()) + return out +} + +func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) { + if !usage.Exists() || usage.Type == gjson.Null { + return 0, 0, 0 } - return out + inputTokens := usage.Get("input_tokens").Int() + outputTokens := usage.Get("output_tokens").Int() + cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int() + + if cachedTokens > 0 { + if inputTokens >= cachedTokens { + inputTokens -= cachedTokens + } else { + inputTokens = 0 + } + } + + return inputTokens, outputTokens, cachedTokens } // buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools. diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go index 1629545d..b6e0d005 100644 --- a/internal/translator/openai/claude/openai_claude_response.go +++ b/internal/translator/openai/claude/openai_claude_response.go @@ -289,21 +289,17 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI // Only process if usage has actual values (not null) if param.FinishReason != "" { usage := root.Get("usage") - var inputTokens, outputTokens int64 + var inputTokens, outputTokens, cachedTokens int64 if usage.Exists() && usage.Type != gjson.Null { - // Check if usage has actual token counts - promptTokens := usage.Get("prompt_tokens") - completionTokens := usage.Get("completion_tokens") - - if promptTokens.Exists() && completionTokens.Exists() { - inputTokens = promptTokens.Int() - outputTokens = completionTokens.Int() - } + inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage) // Send message_delta with usage messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}` messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason)) messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens) messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens) + } results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n") param.MessageDeltaSent = true @@ -423,13 +419,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string { // Set usage information if usage := root.Get("usage"); usage.Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", usage.Get("prompt_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", usage.Get("completion_tokens").Int()) - reasoningTokens := int64(0) - if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() { - reasoningTokens = v.Int() + inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) } - out, _ = sjson.Set(out, "usage.reasoning_tokens", reasoningTokens) } return []string{out} @@ -674,8 +669,12 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina } if respUsage := root.Get("usage"); respUsage.Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", respUsage.Get("prompt_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", respUsage.Get("completion_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) + } } if !stopReasonSet { @@ -692,3 +691,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina func ClaudeTokenCount(ctx context.Context, count int64) string { return fmt.Sprintf(`{"input_tokens":%d}`, count) } + +func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) { + if !usage.Exists() || usage.Type == gjson.Null { + return 0, 0, 0 + } + + inputTokens := usage.Get("prompt_tokens").Int() + outputTokens := usage.Get("completion_tokens").Int() + cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int() + + if cachedTokens > 0 { + if inputTokens >= cachedTokens { + inputTokens -= cachedTokens + } else { + inputTokens = 0 + } + } + + return inputTokens, outputTokens, cachedTokens +}