diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index af9ffef1..91bc0423 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -95,9 +95,9 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq if totalTokenCountResult := usageResult.Get("totalTokenCount"); totalTokenCountResult.Exists() { template, _ = sjson.Set(template, "usage.total_tokens", totalTokenCountResult.Int()) } - promptTokenCount := usageResult.Get("promptTokenCount").Int() - cachedTokenCount + promptTokenCount := usageResult.Get("promptTokenCount").Int() thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int() - template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount) + template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount) if thoughtsTokenCount > 0 { template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 0415e014..b26d431f 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -100,7 +100,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ } promptTokenCount := usageResult.Get("promptTokenCount").Int() thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int() - template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount) + template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount) if thoughtsTokenCount > 0 { template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index ee581c46..aeec5e9e 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -100,9 +100,9 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR if totalTokenCountResult := usageResult.Get("totalTokenCount"); totalTokenCountResult.Exists() { baseTemplate, _ = sjson.Set(baseTemplate, "usage.total_tokens", totalTokenCountResult.Int()) } - promptTokenCount := usageResult.Get("promptTokenCount").Int() - cachedTokenCount + promptTokenCount := usageResult.Get("promptTokenCount").Int() thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int() - baseTemplate, _ = sjson.Set(baseTemplate, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount) + baseTemplate, _ = sjson.Set(baseTemplate, "usage.prompt_tokens", promptTokenCount) if thoughtsTokenCount > 0 { baseTemplate, _ = sjson.Set(baseTemplate, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } @@ -297,7 +297,7 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina promptTokenCount := usageResult.Get("promptTokenCount").Int() thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int() cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int() - template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount) + template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount) if thoughtsTokenCount > 0 { template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go index 985897fa..73609be7 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go @@ -531,8 +531,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, // usage mapping if um := root.Get("usageMetadata"); um.Exists() { - // input tokens = prompt + thoughts - input := um.Get("promptTokenCount").Int() + um.Get("thoughtsTokenCount").Int() + // input tokens = prompt only (thoughts go to output) + input := um.Get("promptTokenCount").Int() completed, _ = sjson.Set(completed, "response.usage.input_tokens", input) // cached token details: align with OpenAI "cached_tokens" semantics. completed, _ = sjson.Set(completed, "response.usage.input_tokens_details.cached_tokens", um.Get("cachedContentTokenCount").Int()) @@ -737,8 +737,8 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string // usage mapping if um := root.Get("usageMetadata"); um.Exists() { - // input tokens = prompt + thoughts - input := um.Get("promptTokenCount").Int() + um.Get("thoughtsTokenCount").Int() + // input tokens = prompt only (thoughts go to output) + input := um.Get("promptTokenCount").Int() resp, _ = sjson.Set(resp, "usage.input_tokens", input) // cached token details: align with OpenAI "cached_tokens" semantics. resp, _ = sjson.Set(resp, "usage.input_tokens_details.cached_tokens", um.Get("cachedContentTokenCount").Int())