feat(translator): capture cached token count in usage metadata and handle prompt caching

- Added support to extract and include `cachedContentTokenCount` in `usage.prompt_tokens_details`. - Logged warnings for failures to set cached token count for better debugging.
2026-03-02 04:24:11 +08:00 · 2026-02-06 21:28:40 +08:00
parent dc9b4dd017
commit 1187aa8222
1 changed files with 10 additions and 0 deletions
@@ -14,6 +14,7 @@ import (
 	"time"

 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
+	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -85,6 +86,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ

 	// Extract and set usage metadata (token counts).
 	if usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata"); usageResult.Exists() {
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
 		}
@@ -97,6 +99,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			var err error
+			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			if err != nil {
+				log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err)
+			}
+		}
 	}

 	// Process the main content part of the response.