diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 8b4e37ee..4ccc65df 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -39,7 +39,7 @@ const (
 	antigravityModelsPath      = "/v1internal:fetchAvailableModels"
 	antigravityClientID        = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
 	antigravityClientSecret    = "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"
-	defaultAntigravityAgent    = "antigravity/1.11.5 windows/amd64"
+	defaultAntigravityAgent    = "antigravity/1.104.0 darwin/arm64"
 	antigravityAuthType        = "antigravity"
 	refreshSkew                = 3000 * time.Second
 )
@@ -1145,10 +1145,11 @@ func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
 	if base := resolveCustomAntigravityBaseURL(auth); base != "" {
 		return []string{base}
 	}
+	// Production endpoint first (matches antigravity.js plugin behavior)
+	// Production may have better caching support
 	return []string{
-		antigravityBaseURLDaily,
-		// antigravityBaseURLAutopush,
 		antigravityBaseURLProd,
+		antigravityBaseURLDaily,
 	}
 }
 
@@ -1183,7 +1184,6 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
 		template, _ = sjson.Set(template, "project", generateProjectID())
 	}
 	template, _ = sjson.Set(template, "requestId", generateRequestID())
-	template, _ = sjson.Set(template, "request.sessionId", generateSessionID())
 
 	template, _ = sjson.Delete(template, "request.safetySettings")
 	template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
@@ -1218,11 +1218,6 @@ func generateRequestID() string {
 	return "agent-" + uuid.NewString()
 }
 
-func generateSessionID() string {
-	n := randSource.Int63n(9_000_000_000_000_000_000)
-	return "-" + strconv.FormatInt(n, 10)
-}
-
 func generateProjectID() string {
 	adjectives := []string{"useful", "bright", "swift", "calm", "bold"}
 	nouns := []string{"fuze", "wave", "spark", "flow", "core"}
diff --git a/internal/translator/antigravity/claude/antigravity_claude_response.go b/internal/translator/antigravity/claude/antigravity_claude_response.go
index 52fc358e..30d0b164 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -32,6 +32,7 @@ type Params struct {
 	CandidatesTokenCount int64  // Cached candidate token count from usage metadata
 	ThoughtsTokenCount   int64  // Cached thinking token count from usage metadata
 	TotalTokenCount      int64  // Cached total token count from usage metadata
+	CachedTokenCount     int64  // Cached content token count (indicates prompt caching)
 	HasSentFinalEvents   bool   // Indicates if final content/message events have been sent
 	HasToolUse           bool   // Indicates if tool use was observed in the stream
 	HasContent           bool   // Tracks whether any content (text, thinking, or tool use) has been output
@@ -254,6 +255,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 		params.CandidatesTokenCount = usageResult.Get("candidatesTokenCount").Int()
 		params.ThoughtsTokenCount = usageResult.Get("thoughtsTokenCount").Int()
 		params.TotalTokenCount = usageResult.Get("totalTokenCount").Int()
+		params.CachedTokenCount = usageResult.Get("cachedContentTokenCount").Int()
 		if params.CandidatesTokenCount == 0 && params.TotalTokenCount > 0 {
 			params.CandidatesTokenCount = params.TotalTokenCount - params.PromptTokenCount - params.ThoughtsTokenCount
 			if params.CandidatesTokenCount < 0 {
@@ -302,6 +304,10 @@ func appendFinalEvents(params *Params, output *string, force bool) {
 	*output = *output + "event: message_delta\n"
 	*output = *output + "data: "
 	delta := fmt.Sprintf(`{"type":"message_delta","delta":{"stop_reason":"%s","stop_sequence":null},"usage":{"input_tokens":%d,"output_tokens":%d}}`, stopReason, params.PromptTokenCount, usageOutputTokens)
+	// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
+	if params.CachedTokenCount > 0 {
+		delta, _ = sjson.Set(delta, "usage.cache_read_input_tokens", params.CachedTokenCount)
+	}
 	*output = *output + delta + "\n\n\n"
 
 	params.HasSentFinalEvents = true
@@ -341,6 +347,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 	candidateTokens := root.Get("response.usageMetadata.candidatesTokenCount").Int()
 	thoughtTokens := root.Get("response.usageMetadata.thoughtsTokenCount").Int()
 	totalTokens := root.Get("response.usageMetadata.totalTokenCount").Int()
+	cachedTokens := root.Get("response.usageMetadata.cachedContentTokenCount").Int()
 	outputTokens := candidateTokens + thoughtTokens
 	if outputTokens == 0 && totalTokens > 0 {
 		outputTokens = totalTokens - promptTokens
@@ -354,6 +361,10 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 	responseJSON, _ = sjson.Set(responseJSON, "model", root.Get("response.modelVersion").String())
 	responseJSON, _ = sjson.Set(responseJSON, "usage.input_tokens", promptTokens)
 	responseJSON, _ = sjson.Set(responseJSON, "usage.output_tokens", outputTokens)
+	// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
+	if cachedTokens > 0 {
+		responseJSON, _ = sjson.Set(responseJSON, "usage.cache_read_input_tokens", cachedTokens)
+	}
 
 	contentArrayInitialized := false
 	ensureContentArray := func() {
diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
index 24694e1d..59a08621 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
@@ -94,10 +94,15 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int()
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			template, _ = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+		}
 	}
 
 	// Process the main content part of the response.
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
index a1ebc855..e0ce4636 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -97,10 +97,15 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int()
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			template, _ = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+		}
 	}
 
 	// Process the main content part of the response.
@@ -248,10 +253,15 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int()
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			template, _ = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+		}
 	}
 
 	// Process the main content part of the response.