feat(translator): capture cached token count in usage metadata and handle prompt caching

- Added support to extract and include `cachedContentTokenCount` in `usage.prompt_tokens_details`.
- Logged warnings for failures to set cached token count for better debugging.
This commit is contained in:
Luis Pater
2026-02-06 21:28:40 +08:00
parent dc9b4dd017
commit 1187aa8222

View File

@@ -14,6 +14,7 @@ import (
"time"
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -85,6 +86,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
// Extract and set usage metadata (token counts).
if usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata"); usageResult.Exists() {
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
}
@@ -97,6 +99,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
if thoughtsTokenCount > 0 {
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
}
// Include cached token count if present (indicates prompt caching is working)
if cachedTokenCount > 0 {
var err error
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
if err != nil {
log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err)
}
}
}
// Process the main content part of the response.