Compare commits

..

12 Commits

Author SHA1 Message Date
Luis Pater
9d50a68768 **feat(translator): improve content processing and Antigravity request conversion**
- Refactored response translation logic to support mixed content types (`input_text`, `output_text`, `input_image`) with better role assignments and part handling.
- Added image processing logic for embedding inline data with MIME type and base64 encoded content.
- Updated Antigravity request conversion to replace Gemini CLI references for consistency.
2025-11-22 21:34:34 +08:00
Luis Pater
7c3c24addc Merge pull request #306 from router-for-me/usage
fix some bugs
2025-11-22 17:45:49 +08:00
hkfires
166fa9e2e6 fix(gemini): parse stream usage from JSON, skip thoughtSignature 2025-11-22 16:07:12 +08:00
hkfires
88e566281e fix(gemini): filter SSE usage metadata in streams 2025-11-22 15:53:36 +08:00
hkfires
d32bb9db6b fix(runtime): treat non-empty finishReason as terminal 2025-11-22 15:39:46 +08:00
hkfires
8356b35320 fix(executor): expire stop chunks without usage metadata 2025-11-22 15:27:47 +08:00
hkfires
19a048879c feat(runtime): track antigravity usage and token counts 2025-11-22 14:04:28 +08:00
hkfires
1061354b2f fix: handle empty and non-JSON SSE chunks safely 2025-11-22 13:49:23 +08:00
hkfires
46b4110ff3 fix: preserve SSE usage metadata-only trailing chunks 2025-11-22 13:25:25 +08:00
hkfires
c29931e093 fix(translator): ignore empty JSON chunks in OpenAI responses 2025-11-22 13:09:16 +08:00
hkfires
b05cfd9f84 fix(translator): include empty text chunks in responses 2025-11-22 13:03:50 +08:00
hkfires
8ce22b8403 fix(sse): preserve usage metadata for stop chunks 2025-11-22 12:50:23 +08:00
8 changed files with 310 additions and 155 deletions

View File

@@ -104,6 +104,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
return resp, err
}
reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
var param any
converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
resp = cliproxyexecutor.Response{Payload: []byte(converted)}
@@ -172,7 +173,16 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
// Only retain usage statistics in the terminal chunk
line = FilterSSEUsageMetadata(line)
chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(line), &param)
payload := jsonPayload(line)
if payload == nil {
continue
}
if detail, ok := parseAntigravityStreamUsage(payload); ok {
reporter.publish(ctx, detail)
}
chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
for i := range chunks {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
}

View File

@@ -256,10 +256,15 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
for scanner.Scan() {
line := scanner.Bytes()
appendAPIResponseChunk(ctx, e.cfg, line)
if detail, ok := parseGeminiStreamUsage(line); ok {
filtered := FilterSSEUsageMetadata(line)
payload := jsonPayload(filtered)
if len(payload) == 0 {
continue
}
if detail, ok := parseGeminiStreamUsage(payload); ok {
reporter.publish(ctx, detail)
}
lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), &param)
for i := range lines {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
}

View File

@@ -365,6 +365,204 @@ func parseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) {
return detail, true
}
func parseAntigravityUsage(data []byte) usage.Detail {
usageNode := gjson.ParseBytes(data)
node := usageNode.Get("response.usageMetadata")
if !node.Exists() {
node = usageNode.Get("usageMetadata")
}
if !node.Exists() {
node = usageNode.Get("usage_metadata")
}
if !node.Exists() {
return usage.Detail{}
}
detail := usage.Detail{
InputTokens: node.Get("promptTokenCount").Int(),
OutputTokens: node.Get("candidatesTokenCount").Int(),
ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
TotalTokens: node.Get("totalTokenCount").Int(),
}
if detail.TotalTokens == 0 {
detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
}
return detail
}
func parseAntigravityStreamUsage(line []byte) (usage.Detail, bool) {
payload := jsonPayload(line)
if len(payload) == 0 || !gjson.ValidBytes(payload) {
return usage.Detail{}, false
}
node := gjson.GetBytes(payload, "response.usageMetadata")
if !node.Exists() {
node = gjson.GetBytes(payload, "usageMetadata")
}
if !node.Exists() {
node = gjson.GetBytes(payload, "usage_metadata")
}
if !node.Exists() {
return usage.Detail{}, false
}
detail := usage.Detail{
InputTokens: node.Get("promptTokenCount").Int(),
OutputTokens: node.Get("candidatesTokenCount").Int(),
ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
TotalTokens: node.Get("totalTokenCount").Int(),
}
if detail.TotalTokens == 0 {
detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
}
return detail, true
}
var stopChunkWithoutUsage sync.Map
func rememberStopWithoutUsage(traceID string) {
stopChunkWithoutUsage.Store(traceID, struct{}{})
time.AfterFunc(10*time.Minute, func() { stopChunkWithoutUsage.Delete(traceID) })
}
// FilterSSEUsageMetadata removes usageMetadata from SSE events that are not
// terminal (finishReason != "stop"). Stop chunks are left untouched. This
// function is shared between aistudio and antigravity executors.
func FilterSSEUsageMetadata(payload []byte) []byte {
if len(payload) == 0 {
return payload
}
lines := bytes.Split(payload, []byte("\n"))
modified := false
foundData := false
for idx, line := range lines {
trimmed := bytes.TrimSpace(line)
if len(trimmed) == 0 || !bytes.HasPrefix(trimmed, []byte("data:")) {
continue
}
foundData = true
dataIdx := bytes.Index(line, []byte("data:"))
if dataIdx < 0 {
continue
}
rawJSON := bytes.TrimSpace(line[dataIdx+5:])
traceID := gjson.GetBytes(rawJSON, "traceId").String()
if isStopChunkWithoutUsage(rawJSON) && traceID != "" {
rememberStopWithoutUsage(traceID)
continue
}
if traceID != "" {
if _, ok := stopChunkWithoutUsage.Load(traceID); ok && hasUsageMetadata(rawJSON) {
stopChunkWithoutUsage.Delete(traceID)
continue
}
}
cleaned, changed := StripUsageMetadataFromJSON(rawJSON)
if !changed {
continue
}
var rebuilt []byte
rebuilt = append(rebuilt, line[:dataIdx]...)
rebuilt = append(rebuilt, []byte("data:")...)
if len(cleaned) > 0 {
rebuilt = append(rebuilt, ' ')
rebuilt = append(rebuilt, cleaned...)
}
lines[idx] = rebuilt
modified = true
}
if !modified {
if !foundData {
// Handle payloads that are raw JSON without SSE data: prefix.
trimmed := bytes.TrimSpace(payload)
cleaned, changed := StripUsageMetadataFromJSON(trimmed)
if !changed {
return payload
}
return cleaned
}
return payload
}
return bytes.Join(lines, []byte("\n"))
}
// StripUsageMetadataFromJSON drops usageMetadata unless finishReason is present (terminal).
// It handles both formats:
// - Aistudio: candidates.0.finishReason
// - Antigravity: response.candidates.0.finishReason
func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) {
jsonBytes := bytes.TrimSpace(rawJSON)
if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) {
return rawJSON, false
}
// Check for finishReason in both aistudio and antigravity formats
finishReason := gjson.GetBytes(jsonBytes, "candidates.0.finishReason")
if !finishReason.Exists() {
finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason")
}
terminalReason := finishReason.Exists() && strings.TrimSpace(finishReason.String()) != ""
usageMetadata := gjson.GetBytes(jsonBytes, "usageMetadata")
if !usageMetadata.Exists() {
usageMetadata = gjson.GetBytes(jsonBytes, "response.usageMetadata")
}
// Terminal chunk: keep as-is.
if terminalReason {
return rawJSON, false
}
// Nothing to strip
if !usageMetadata.Exists() {
return rawJSON, false
}
// Remove usageMetadata from both possible locations
cleaned := jsonBytes
var changed bool
if gjson.GetBytes(cleaned, "usageMetadata").Exists() {
cleaned, _ = sjson.DeleteBytes(cleaned, "usageMetadata")
changed = true
}
if gjson.GetBytes(cleaned, "response.usageMetadata").Exists() {
cleaned, _ = sjson.DeleteBytes(cleaned, "response.usageMetadata")
changed = true
}
return cleaned, changed
}
func hasUsageMetadata(jsonBytes []byte) bool {
if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) {
return false
}
if gjson.GetBytes(jsonBytes, "usageMetadata").Exists() {
return true
}
if gjson.GetBytes(jsonBytes, "response.usageMetadata").Exists() {
return true
}
return false
}
func isStopChunkWithoutUsage(jsonBytes []byte) bool {
if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) {
return false
}
finishReason := gjson.GetBytes(jsonBytes, "candidates.0.finishReason")
if !finishReason.Exists() {
finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason")
}
trimmed := strings.TrimSpace(finishReason.String())
if !finishReason.Exists() || trimmed == "" {
return false
}
return !hasUsageMetadata(jsonBytes)
}
func jsonPayload(line []byte) []byte {
trimmed := bytes.TrimSpace(line)
if len(trimmed) == 0 {
@@ -384,109 +582,3 @@ func jsonPayload(line []byte) []byte {
}
return trimmed
}
// FilterSSEUsageMetadata removes usageMetadata from intermediate SSE events so that
// only the terminal chunk retains token statistics.
// This function is shared between aistudio and antigravity executors.
func FilterSSEUsageMetadata(payload []byte) []byte {
if len(payload) == 0 {
return payload
}
lines := bytes.Split(payload, []byte("\n"))
modified := false
for idx, line := range lines {
trimmed := bytes.TrimSpace(line)
if len(trimmed) == 0 || !bytes.HasPrefix(trimmed, []byte("data:")) {
continue
}
dataIdx := bytes.Index(line, []byte("data:"))
if dataIdx < 0 {
continue
}
rawJSON := bytes.TrimSpace(line[dataIdx+5:])
cleaned, changed := StripUsageMetadataFromJSON(rawJSON)
if !changed {
continue
}
var rebuilt []byte
rebuilt = append(rebuilt, line[:dataIdx]...)
rebuilt = append(rebuilt, []byte("data:")...)
if len(cleaned) > 0 {
rebuilt = append(rebuilt, ' ')
rebuilt = append(rebuilt, cleaned...)
}
lines[idx] = rebuilt
modified = true
}
if !modified {
return payload
}
return bytes.Join(lines, []byte("\n"))
}
// StripUsageMetadataFromJSON drops usageMetadata when no finishReason is present.
// This function is shared between aistudio and antigravity executors.
// It handles both formats:
// - Aistudio: candidates.0.finishReason
// - Antigravity: response.candidates.0.finishReason
func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) {
jsonBytes := bytes.TrimSpace(rawJSON)
if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) {
return rawJSON, false
}
// Check for finishReason in both aistudio and antigravity formats
finishReason := gjson.GetBytes(jsonBytes, "candidates.0.finishReason")
if !finishReason.Exists() {
finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason")
}
// If finishReason exists and is not empty, keep the usageMetadata
if finishReason.Exists() && finishReason.String() != "" {
return rawJSON, false
}
// Check for usageMetadata in both possible locations
usageMetadata := gjson.GetBytes(jsonBytes, "usageMetadata")
if !usageMetadata.Exists() {
usageMetadata = gjson.GetBytes(jsonBytes, "response.usageMetadata")
}
if hasNonZeroUsageMetadata(usageMetadata) {
return rawJSON, false
}
if !usageMetadata.Exists() {
return rawJSON, false
}
// Remove usageMetadata from both possible locations
cleaned := jsonBytes
var changed bool
// Try to remove usageMetadata from root level
if gjson.GetBytes(cleaned, "usageMetadata").Exists() {
cleaned, _ = sjson.DeleteBytes(cleaned, "usageMetadata")
changed = true
}
// Try to remove usageMetadata from response level
if gjson.GetBytes(cleaned, "response.usageMetadata").Exists() {
cleaned, _ = sjson.DeleteBytes(cleaned, "response.usageMetadata")
changed = true
}
return cleaned, changed
}
// hasNonZeroUsageMetadata checks if any usage token counts are present.
func hasNonZeroUsageMetadata(node gjson.Result) bool {
if !node.Exists() {
return false
}
return node.Get("totalTokenCount").Int() > 0 ||
node.Get("promptTokenCount").Int() > 0 ||
node.Get("candidatesTokenCount").Int() > 0 ||
node.Get("thoughtsTokenCount").Int() > 0
}

View File

@@ -98,7 +98,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
// Process the main content part of the response.
partsResult := gjson.GetBytes(rawJSON, "response.candidates.0.content.parts")
hasFunctionCall := false
hasValidContent := false
if partsResult.IsArray() {
partResults := partsResult.Array()
for i := 0; i < len(partResults); i++ {
@@ -119,10 +118,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
if partTextResult.Exists() {
textContent := partTextResult.String()
// Skip empty text content to avoid generating unnecessary chunks
if textContent == "" {
continue
}
// Handle text content, distinguishing between regular content and reasoning/thoughts.
if partResult.Get("thought").Bool() {
@@ -131,7 +126,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
template, _ = sjson.Set(template, "choices.0.delta.content", textContent)
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
hasValidContent = true
} else if functionCallResult.Exists() {
// Handle function call content.
hasFunctionCall = true
@@ -191,12 +185,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
}
// Only return a chunk if there's actual content or a finish reason
finishReason := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
if !hasValidContent && !finishReason.Exists() {
return []string{}
}
return []string{template}
}

View File

@@ -3,12 +3,12 @@ package responses
import (
"bytes"
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini-cli/gemini"
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/gemini"
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/responses"
)
func ConvertOpenAIResponsesRequestToAntigravity(modelName string, inputRawJSON []byte, stream bool) []byte {
rawJSON := bytes.Clone(inputRawJSON)
rawJSON = ConvertOpenAIResponsesRequestToGemini(modelName, rawJSON, stream)
return ConvertGeminiRequestToGeminiCLI(modelName, rawJSON, stream)
return ConvertGeminiRequestToAntigravity(modelName, rawJSON, stream)
}

View File

@@ -98,7 +98,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
// Process the main content part of the response.
partsResult := gjson.GetBytes(rawJSON, "response.candidates.0.content.parts")
hasFunctionCall := false
hasValidContent := false
if partsResult.IsArray() {
partResults := partsResult.Array()
for i := 0; i < len(partResults); i++ {
@@ -119,10 +118,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
if partTextResult.Exists() {
textContent := partTextResult.String()
// Skip empty text content to avoid generating unnecessary chunks
if textContent == "" {
continue
}
// Handle text content, distinguishing between regular content and reasoning/thoughts.
if partResult.Get("thought").Bool() {
@@ -131,7 +126,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
template, _ = sjson.Set(template, "choices.0.delta.content", textContent)
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
hasValidContent = true
} else if functionCallResult.Exists() {
// Handle function call content.
hasFunctionCall = true
@@ -191,12 +185,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
}
// Only return a chunk if there's actual content or a finish reason
finishReason := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
if !hasValidContent && !finishReason.Exists() {
return []string{}
}
return []string{template}
}

View File

@@ -111,13 +111,23 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
if !inlineDataResult.Exists() {
inlineDataResult = partResult.Get("inline_data")
}
thoughtSignatureResult := partResult.Get("thoughtSignature")
if !thoughtSignatureResult.Exists() {
thoughtSignatureResult = partResult.Get("thought_signature")
}
// Skip thoughtSignature parts (encrypted reasoning not exposed downstream).
if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" {
continue
}
if partTextResult.Exists() {
text := partTextResult.String()
// Handle text content, distinguishing between regular content and reasoning/thoughts.
if partResult.Get("thought").Bool() {
template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", partTextResult.String())
template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text)
} else {
template, _ = sjson.Set(template, "choices.0.delta.content", partTextResult.String())
template, _ = sjson.Set(template, "choices.0.delta.content", text)
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
} else if functionCallResult.Exists() {

View File

@@ -67,39 +67,101 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
// even when the message.role is "user". We split such items into distinct Gemini messages
// with roles derived from the content type to match docs/convert-2.md.
if contentArray := item.Get("content"); contentArray.Exists() && contentArray.IsArray() {
currentRole := ""
var currentParts []string
flush := func() {
if currentRole == "" || len(currentParts) == 0 {
currentParts = nil
return
}
one := `{"role":"","parts":[]}`
one, _ = sjson.Set(one, "role", currentRole)
for _, part := range currentParts {
one, _ = sjson.SetRaw(one, "parts.-1", part)
}
out, _ = sjson.SetRaw(out, "contents.-1", one)
currentParts = nil
}
contentArray.ForEach(func(_, contentItem gjson.Result) bool {
contentType := contentItem.Get("type").String()
if contentType == "" {
contentType = "input_text"
}
effRole := "user"
if itemRole != "" {
switch strings.ToLower(itemRole) {
case "assistant", "model":
effRole = "model"
default:
effRole = strings.ToLower(itemRole)
}
}
if contentType == "output_text" {
effRole = "model"
}
if effRole == "assistant" {
effRole = "model"
}
if currentRole != "" && effRole != currentRole {
flush()
currentRole = ""
}
if currentRole == "" {
currentRole = effRole
}
var partJSON string
switch contentType {
case "input_text", "output_text":
if text := contentItem.Get("text"); text.Exists() {
effRole := "user"
if itemRole != "" {
switch strings.ToLower(itemRole) {
case "assistant", "model":
effRole = "model"
default:
effRole = strings.ToLower(itemRole)
partJSON = `{"text":""}`
partJSON, _ = sjson.Set(partJSON, "text", text.String())
}
case "input_image":
imageURL := contentItem.Get("image_url").String()
if imageURL == "" {
imageURL = contentItem.Get("url").String()
}
if imageURL != "" {
mimeType := "application/octet-stream"
data := ""
if strings.HasPrefix(imageURL, "data:") {
trimmed := strings.TrimPrefix(imageURL, "data:")
mediaAndData := strings.SplitN(trimmed, ";base64,", 2)
if len(mediaAndData) == 2 {
if mediaAndData[0] != "" {
mimeType = mediaAndData[0]
}
data = mediaAndData[1]
} else {
mediaAndData = strings.SplitN(trimmed, ",", 2)
if len(mediaAndData) == 2 {
if mediaAndData[0] != "" {
mimeType = mediaAndData[0]
}
data = mediaAndData[1]
}
}
}
if contentType == "output_text" {
effRole = "model"
if data != "" {
partJSON = `{"inline_data":{"mime_type":"","data":""}}`
partJSON, _ = sjson.Set(partJSON, "inline_data.mime_type", mimeType)
partJSON, _ = sjson.Set(partJSON, "inline_data.data", data)
}
if effRole == "assistant" {
effRole = "model"
}
one := `{"role":"","parts":[]}`
one, _ = sjson.Set(one, "role", effRole)
textPart := `{"text":""}`
textPart, _ = sjson.Set(textPart, "text", text.String())
one, _ = sjson.SetRaw(one, "parts.-1", textPart)
out, _ = sjson.SetRaw(out, "contents.-1", one)
}
}
if partJSON != "" {
currentParts = append(currentParts, partJSON)
}
return true
})
flush()
}
case "function_call":