diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 5a1faf51..97c18c1e 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -14,6 +14,7 @@ import ( "time" . "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions" + log "github.com/sirupsen/logrus" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -85,6 +86,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ // Extract and set usage metadata (token counts). if usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata"); usageResult.Exists() { + cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int() if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() { template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int()) } @@ -97,6 +99,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ if thoughtsTokenCount > 0 { template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } + // Include cached token count if present (indicates prompt caching is working) + if cachedTokenCount > 0 { + var err error + template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount) + if err != nil { + log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err) + } + } } // Process the main content part of the response.