diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go index 5529d52a..985897fa 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go @@ -20,6 +20,7 @@ type geminiToResponsesState struct { // message aggregation MsgOpened bool + MsgClosed bool MsgIndex int CurrentMsgID string TextBuf strings.Builder @@ -29,6 +30,7 @@ type geminiToResponsesState struct { ReasoningOpened bool ReasoningIndex int ReasoningItemID string + ReasoningEnc string ReasoningBuf strings.Builder ReasoningClosed bool @@ -37,6 +39,7 @@ type geminiToResponsesState struct { FuncArgsBuf map[int]*strings.Builder FuncNames map[int]string FuncCallIDs map[int]string + FuncDone map[int]bool } // responseIDCounter provides a process-wide unique counter for synthesized response identifiers. @@ -45,6 +48,39 @@ var responseIDCounter uint64 // funcCallIDCounter provides a process-wide unique counter for function call identifiers. var funcCallIDCounter uint64 +func pickRequestJSON(originalRequestRawJSON, requestRawJSON []byte) []byte { + if len(originalRequestRawJSON) > 0 && gjson.ValidBytes(originalRequestRawJSON) { + return originalRequestRawJSON + } + if len(requestRawJSON) > 0 && gjson.ValidBytes(requestRawJSON) { + return requestRawJSON + } + return nil +} + +func unwrapRequestRoot(root gjson.Result) gjson.Result { + req := root.Get("request") + if !req.Exists() { + return root + } + if req.Get("model").Exists() || req.Get("input").Exists() || req.Get("instructions").Exists() { + return req + } + return root +} + +func unwrapGeminiResponseRoot(root gjson.Result) gjson.Result { + resp := root.Get("response") + if !resp.Exists() { + return root + } + // Vertex-style Gemini responses wrap the actual payload in a "response" object. + if resp.Get("candidates").Exists() || resp.Get("responseId").Exists() || resp.Get("usageMetadata").Exists() { + return resp + } + return root +} + func emitEvent(event string, payload string) string { return fmt.Sprintf("event: %s\ndata: %s", event, payload) } @@ -56,18 +92,37 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, FuncArgsBuf: make(map[int]*strings.Builder), FuncNames: make(map[int]string), FuncCallIDs: make(map[int]string), + FuncDone: make(map[int]bool), } } st := (*param).(*geminiToResponsesState) + if st.FuncArgsBuf == nil { + st.FuncArgsBuf = make(map[int]*strings.Builder) + } + if st.FuncNames == nil { + st.FuncNames = make(map[int]string) + } + if st.FuncCallIDs == nil { + st.FuncCallIDs = make(map[int]string) + } + if st.FuncDone == nil { + st.FuncDone = make(map[int]bool) + } if bytes.HasPrefix(rawJSON, []byte("data:")) { rawJSON = bytes.TrimSpace(rawJSON[5:]) } + rawJSON = bytes.TrimSpace(rawJSON) + if len(rawJSON) == 0 || bytes.Equal(rawJSON, []byte("[DONE]")) { + return []string{} + } + root := gjson.ParseBytes(rawJSON) if !root.Exists() { return []string{} } + root = unwrapGeminiResponseRoot(root) var out []string nextSeq := func() int { st.Seq++; return st.Seq } @@ -98,19 +153,54 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, itemDone, _ = sjson.Set(itemDone, "sequence_number", nextSeq()) itemDone, _ = sjson.Set(itemDone, "item.id", st.ReasoningItemID) itemDone, _ = sjson.Set(itemDone, "output_index", st.ReasoningIndex) + itemDone, _ = sjson.Set(itemDone, "item.encrypted_content", st.ReasoningEnc) itemDone, _ = sjson.Set(itemDone, "item.summary.0.text", full) out = append(out, emitEvent("response.output_item.done", itemDone)) st.ReasoningClosed = true } + // Helper to finalize the assistant message in correct order. + // It emits response.output_text.done, response.content_part.done, + // and response.output_item.done exactly once. + finalizeMessage := func() { + if !st.MsgOpened || st.MsgClosed { + return + } + fullText := st.ItemTextBuf.String() + done := `{"type":"response.output_text.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"text":"","logprobs":[]}` + done, _ = sjson.Set(done, "sequence_number", nextSeq()) + done, _ = sjson.Set(done, "item_id", st.CurrentMsgID) + done, _ = sjson.Set(done, "output_index", st.MsgIndex) + done, _ = sjson.Set(done, "text", fullText) + out = append(out, emitEvent("response.output_text.done", done)) + partDone := `{"type":"response.content_part.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""}}` + partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq()) + partDone, _ = sjson.Set(partDone, "item_id", st.CurrentMsgID) + partDone, _ = sjson.Set(partDone, "output_index", st.MsgIndex) + partDone, _ = sjson.Set(partDone, "part.text", fullText) + out = append(out, emitEvent("response.content_part.done", partDone)) + final := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"completed","content":[{"type":"output_text","text":""}],"role":"assistant"}}` + final, _ = sjson.Set(final, "sequence_number", nextSeq()) + final, _ = sjson.Set(final, "output_index", st.MsgIndex) + final, _ = sjson.Set(final, "item.id", st.CurrentMsgID) + final, _ = sjson.Set(final, "item.content.0.text", fullText) + out = append(out, emitEvent("response.output_item.done", final)) + + st.MsgClosed = true + } + // Initialize per-response fields and emit created/in_progress once if !st.Started { - if v := root.Get("responseId"); v.Exists() { - st.ResponseID = v.String() + st.ResponseID = root.Get("responseId").String() + if st.ResponseID == "" { + st.ResponseID = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1)) + } + if !strings.HasPrefix(st.ResponseID, "resp_") { + st.ResponseID = fmt.Sprintf("resp_%s", st.ResponseID) } if v := root.Get("createTime"); v.Exists() { - if t, err := time.Parse(time.RFC3339Nano, v.String()); err == nil { + if t, errParseCreateTime := time.Parse(time.RFC3339Nano, v.String()); errParseCreateTime == nil { st.CreatedAt = t.Unix() } } @@ -143,15 +233,21 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, // Ignore any late thought chunks after reasoning is finalized. return true } + if sig := part.Get("thoughtSignature"); sig.Exists() && sig.String() != "" && sig.String() != geminiResponsesThoughtSignature { + st.ReasoningEnc = sig.String() + } else if sig = part.Get("thought_signature"); sig.Exists() && sig.String() != "" && sig.String() != geminiResponsesThoughtSignature { + st.ReasoningEnc = sig.String() + } if !st.ReasoningOpened { st.ReasoningOpened = true st.ReasoningIndex = st.NextIndex st.NextIndex++ st.ReasoningItemID = fmt.Sprintf("rs_%s_%d", st.ResponseID, st.ReasoningIndex) - item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"reasoning","status":"in_progress","summary":[]}}` + item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"reasoning","status":"in_progress","encrypted_content":"","summary":[]}}` item, _ = sjson.Set(item, "sequence_number", nextSeq()) item, _ = sjson.Set(item, "output_index", st.ReasoningIndex) item, _ = sjson.Set(item, "item.id", st.ReasoningItemID) + item, _ = sjson.Set(item, "item.encrypted_content", st.ReasoningEnc) out = append(out, emitEvent("response.output_item.added", item)) partAdded := `{"type":"response.reasoning_summary_part.added","sequence_number":0,"item_id":"","output_index":0,"summary_index":0,"part":{"type":"summary_text","text":""}}` partAdded, _ = sjson.Set(partAdded, "sequence_number", nextSeq()) @@ -191,9 +287,9 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, partAdded, _ = sjson.Set(partAdded, "output_index", st.MsgIndex) out = append(out, emitEvent("response.content_part.added", partAdded)) st.ItemTextBuf.Reset() - st.ItemTextBuf.WriteString(t.String()) } st.TextBuf.WriteString(t.String()) + st.ItemTextBuf.WriteString(t.String()) msg := `{"type":"response.output_text.delta","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"delta":"","logprobs":[]}` msg, _ = sjson.Set(msg, "sequence_number", nextSeq()) msg, _ = sjson.Set(msg, "item_id", st.CurrentMsgID) @@ -205,8 +301,10 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, // Function call if fc := part.Get("functionCall"); fc.Exists() { - // Before emitting function-call outputs, finalize reasoning if open. + // Before emitting function-call outputs, finalize reasoning and the message (if open). + // Responses streaming requires message done events before the next output_item.added. finalizeReasoning() + finalizeMessage() name := fc.Get("name").String() idx := st.NextIndex st.NextIndex++ @@ -219,6 +317,14 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, } st.FuncNames[idx] = name + argsJSON := "{}" + if args := fc.Get("args"); args.Exists() { + argsJSON = args.Raw + } + if st.FuncArgsBuf[idx].Len() == 0 && argsJSON != "" { + st.FuncArgsBuf[idx].WriteString(argsJSON) + } + // Emit item.added for function call item := `{"type":"response.output_item.added","sequence_number":0,"output_index":0,"item":{"id":"","type":"function_call","status":"in_progress","arguments":"","call_id":"","name":""}}` item, _ = sjson.Set(item, "sequence_number", nextSeq()) @@ -228,10 +334,9 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, item, _ = sjson.Set(item, "item.name", name) out = append(out, emitEvent("response.output_item.added", item)) - // Emit arguments delta (full args in one chunk) - if args := fc.Get("args"); args.Exists() { - argsJSON := args.Raw - st.FuncArgsBuf[idx].WriteString(argsJSON) + // Emit arguments delta (full args in one chunk). + // When Gemini omits args, emit "{}" to keep Responses streaming event order consistent. + if argsJSON != "" { ad := `{"type":"response.function_call_arguments.delta","sequence_number":0,"item_id":"","output_index":0,"delta":""}` ad, _ = sjson.Set(ad, "sequence_number", nextSeq()) ad, _ = sjson.Set(ad, "item_id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx])) @@ -240,6 +345,27 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, out = append(out, emitEvent("response.function_call_arguments.delta", ad)) } + // Gemini emits the full function call payload at once, so we can finalize it immediately. + if !st.FuncDone[idx] { + fcDone := `{"type":"response.function_call_arguments.done","sequence_number":0,"item_id":"","output_index":0,"arguments":""}` + fcDone, _ = sjson.Set(fcDone, "sequence_number", nextSeq()) + fcDone, _ = sjson.Set(fcDone, "item_id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx])) + fcDone, _ = sjson.Set(fcDone, "output_index", idx) + fcDone, _ = sjson.Set(fcDone, "arguments", argsJSON) + out = append(out, emitEvent("response.function_call_arguments.done", fcDone)) + + itemDone := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}}` + itemDone, _ = sjson.Set(itemDone, "sequence_number", nextSeq()) + itemDone, _ = sjson.Set(itemDone, "output_index", idx) + itemDone, _ = sjson.Set(itemDone, "item.id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx])) + itemDone, _ = sjson.Set(itemDone, "item.arguments", argsJSON) + itemDone, _ = sjson.Set(itemDone, "item.call_id", st.FuncCallIDs[idx]) + itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx]) + out = append(out, emitEvent("response.output_item.done", itemDone)) + + st.FuncDone[idx] = true + } + return true } @@ -251,28 +377,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, if fr := root.Get("candidates.0.finishReason"); fr.Exists() && fr.String() != "" { // Finalize reasoning first to keep ordering tight with last delta finalizeReasoning() - // Close message output if opened - if st.MsgOpened { - fullText := st.ItemTextBuf.String() - done := `{"type":"response.output_text.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"text":"","logprobs":[]}` - done, _ = sjson.Set(done, "sequence_number", nextSeq()) - done, _ = sjson.Set(done, "item_id", st.CurrentMsgID) - done, _ = sjson.Set(done, "output_index", st.MsgIndex) - done, _ = sjson.Set(done, "text", fullText) - out = append(out, emitEvent("response.output_text.done", done)) - partDone := `{"type":"response.content_part.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""}}` - partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq()) - partDone, _ = sjson.Set(partDone, "item_id", st.CurrentMsgID) - partDone, _ = sjson.Set(partDone, "output_index", st.MsgIndex) - partDone, _ = sjson.Set(partDone, "part.text", fullText) - out = append(out, emitEvent("response.content_part.done", partDone)) - final := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"completed","content":[{"type":"output_text","text":""}],"role":"assistant"}}` - final, _ = sjson.Set(final, "sequence_number", nextSeq()) - final, _ = sjson.Set(final, "output_index", st.MsgIndex) - final, _ = sjson.Set(final, "item.id", st.CurrentMsgID) - final, _ = sjson.Set(final, "item.content.0.text", fullText) - out = append(out, emitEvent("response.output_item.done", final)) - } + finalizeMessage() // Close function calls if len(st.FuncArgsBuf) > 0 { @@ -289,6 +394,9 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, } } for _, idx := range idxs { + if st.FuncDone[idx] { + continue + } args := "{}" if b := st.FuncArgsBuf[idx]; b != nil && b.Len() > 0 { args = b.String() @@ -308,6 +416,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, itemDone, _ = sjson.Set(itemDone, "item.call_id", st.FuncCallIDs[idx]) itemDone, _ = sjson.Set(itemDone, "item.name", st.FuncNames[idx]) out = append(out, emitEvent("response.output_item.done", itemDone)) + + st.FuncDone[idx] = true } } @@ -319,8 +429,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, completed, _ = sjson.Set(completed, "response.id", st.ResponseID) completed, _ = sjson.Set(completed, "response.created_at", st.CreatedAt) - if requestRawJSON != nil { - req := gjson.ParseBytes(requestRawJSON) + if reqJSON := pickRequestJSON(originalRequestRawJSON, requestRawJSON); len(reqJSON) > 0 { + req := unwrapRequestRoot(gjson.ParseBytes(reqJSON)) if v := req.Get("instructions"); v.Exists() { completed, _ = sjson.Set(completed, "response.instructions", v.String()) } @@ -383,41 +493,34 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, } } - // Compose outputs in encountered order: reasoning, message, function_calls + // Compose outputs in output_index order. outputsWrapper := `{"arr":[]}` - if st.ReasoningOpened { - item := `{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}` - item, _ = sjson.Set(item, "id", st.ReasoningItemID) - item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String()) - outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item) - } - if st.MsgOpened { - item := `{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}` - item, _ = sjson.Set(item, "id", st.CurrentMsgID) - item, _ = sjson.Set(item, "content.0.text", st.TextBuf.String()) - outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item) - } - if len(st.FuncArgsBuf) > 0 { - idxs := make([]int, 0, len(st.FuncArgsBuf)) - for idx := range st.FuncArgsBuf { - idxs = append(idxs, idx) + for idx := 0; idx < st.NextIndex; idx++ { + if st.ReasoningOpened && idx == st.ReasoningIndex { + item := `{"id":"","type":"reasoning","encrypted_content":"","summary":[{"type":"summary_text","text":""}]}` + item, _ = sjson.Set(item, "id", st.ReasoningItemID) + item, _ = sjson.Set(item, "encrypted_content", st.ReasoningEnc) + item, _ = sjson.Set(item, "summary.0.text", st.ReasoningBuf.String()) + outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item) + continue } - for i := 0; i < len(idxs); i++ { - for j := i + 1; j < len(idxs); j++ { - if idxs[j] < idxs[i] { - idxs[i], idxs[j] = idxs[j], idxs[i] - } - } + if st.MsgOpened && idx == st.MsgIndex { + item := `{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}` + item, _ = sjson.Set(item, "id", st.CurrentMsgID) + item, _ = sjson.Set(item, "content.0.text", st.TextBuf.String()) + outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item) + continue } - for _, idx := range idxs { - args := "" - if b := st.FuncArgsBuf[idx]; b != nil { + + if callID, ok := st.FuncCallIDs[idx]; ok && callID != "" { + args := "{}" + if b := st.FuncArgsBuf[idx]; b != nil && b.Len() > 0 { args = b.String() } item := `{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}` - item, _ = sjson.Set(item, "id", fmt.Sprintf("fc_%s", st.FuncCallIDs[idx])) + item, _ = sjson.Set(item, "id", fmt.Sprintf("fc_%s", callID)) item, _ = sjson.Set(item, "arguments", args) - item, _ = sjson.Set(item, "call_id", st.FuncCallIDs[idx]) + item, _ = sjson.Set(item, "call_id", callID) item, _ = sjson.Set(item, "name", st.FuncNames[idx]) outputsWrapper, _ = sjson.SetRaw(outputsWrapper, "arr.-1", item) } @@ -431,8 +534,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, // input tokens = prompt + thoughts input := um.Get("promptTokenCount").Int() + um.Get("thoughtsTokenCount").Int() completed, _ = sjson.Set(completed, "response.usage.input_tokens", input) - // cached_tokens not provided by Gemini; default to 0 for structure compatibility - completed, _ = sjson.Set(completed, "response.usage.input_tokens_details.cached_tokens", 0) + // cached token details: align with OpenAI "cached_tokens" semantics. + completed, _ = sjson.Set(completed, "response.usage.input_tokens_details.cached_tokens", um.Get("cachedContentTokenCount").Int()) // output tokens if v := um.Get("candidatesTokenCount"); v.Exists() { completed, _ = sjson.Set(completed, "response.usage.output_tokens", v.Int()) @@ -460,6 +563,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, // ConvertGeminiResponseToOpenAIResponsesNonStream aggregates Gemini response JSON into a single OpenAI Responses JSON object. func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string { root := gjson.ParseBytes(rawJSON) + root = unwrapGeminiResponseRoot(root) // Base response scaffold resp := `{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null,"incomplete_details":null}` @@ -478,15 +582,15 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string // created_at: map from createTime if available createdAt := time.Now().Unix() if v := root.Get("createTime"); v.Exists() { - if t, err := time.Parse(time.RFC3339Nano, v.String()); err == nil { + if t, errParseCreateTime := time.Parse(time.RFC3339Nano, v.String()); errParseCreateTime == nil { createdAt = t.Unix() } } resp, _ = sjson.Set(resp, "created_at", createdAt) // Echo request fields when present; fallback model from response modelVersion - if len(requestRawJSON) > 0 { - req := gjson.ParseBytes(requestRawJSON) + if reqJSON := pickRequestJSON(originalRequestRawJSON, requestRawJSON); len(reqJSON) > 0 { + req := unwrapRequestRoot(gjson.ParseBytes(reqJSON)) if v := req.Get("instructions"); v.Exists() { resp, _ = sjson.Set(resp, "instructions", v.String()) } @@ -636,8 +740,8 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string // input tokens = prompt + thoughts input := um.Get("promptTokenCount").Int() + um.Get("thoughtsTokenCount").Int() resp, _ = sjson.Set(resp, "usage.input_tokens", input) - // cached_tokens not provided by Gemini; default to 0 for structure compatibility - resp, _ = sjson.Set(resp, "usage.input_tokens_details.cached_tokens", 0) + // cached token details: align with OpenAI "cached_tokens" semantics. + resp, _ = sjson.Set(resp, "usage.input_tokens_details.cached_tokens", um.Get("cachedContentTokenCount").Int()) // output tokens if v := um.Get("candidatesTokenCount"); v.Exists() { resp, _ = sjson.Set(resp, "usage.output_tokens", v.Int()) diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_response_test.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_response_test.go new file mode 100644 index 00000000..9899c594 --- /dev/null +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response_test.go @@ -0,0 +1,353 @@ +package responses + +import ( + "context" + "strings" + "testing" + + "github.com/tidwall/gjson" +) + +func parseSSEEvent(t *testing.T, chunk string) (string, gjson.Result) { + t.Helper() + + lines := strings.Split(chunk, "\n") + if len(lines) < 2 { + t.Fatalf("unexpected SSE chunk: %q", chunk) + } + + event := strings.TrimSpace(strings.TrimPrefix(lines[0], "event:")) + dataLine := strings.TrimSpace(strings.TrimPrefix(lines[1], "data:")) + if !gjson.Valid(dataLine) { + t.Fatalf("invalid SSE data JSON: %q", dataLine) + } + return event, gjson.Parse(dataLine) +} + +func TestConvertGeminiResponseToOpenAIResponses_UnwrapAndAggregateText(t *testing.T) { + // Vertex-style Gemini stream wraps the actual response payload under "response". + // This test ensures we unwrap and that output_text.done contains the full text. + in := []string{ + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"让"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"我先"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"了解"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"mcp__serena__list_dir","args":{"recursive":false,"relative_path":"internal"},"id":"toolu_1"}}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":10,"candidatesTokenCount":5,"totalTokenCount":15,"cachedContentTokenCount":2},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + } + + originalReq := []byte(`{"instructions":"test instructions","model":"gpt-5","max_output_tokens":123}`) + + var param any + var out []string + for _, line := range in { + out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", originalReq, nil, []byte(line), ¶m)...) + } + + var ( + gotTextDone bool + gotMessageDone bool + gotResponseDone bool + gotFuncDone bool + + textDone string + messageText string + responseID string + instructions string + cachedTokens int64 + + funcName string + funcArgs string + + posTextDone = -1 + posPartDone = -1 + posMessageDone = -1 + posFuncAdded = -1 + ) + + for i, chunk := range out { + ev, data := parseSSEEvent(t, chunk) + switch ev { + case "response.output_text.done": + gotTextDone = true + if posTextDone == -1 { + posTextDone = i + } + textDone = data.Get("text").String() + case "response.content_part.done": + if posPartDone == -1 { + posPartDone = i + } + case "response.output_item.done": + switch data.Get("item.type").String() { + case "message": + gotMessageDone = true + if posMessageDone == -1 { + posMessageDone = i + } + messageText = data.Get("item.content.0.text").String() + case "function_call": + gotFuncDone = true + funcName = data.Get("item.name").String() + funcArgs = data.Get("item.arguments").String() + } + case "response.output_item.added": + if data.Get("item.type").String() == "function_call" && posFuncAdded == -1 { + posFuncAdded = i + } + case "response.completed": + gotResponseDone = true + responseID = data.Get("response.id").String() + instructions = data.Get("response.instructions").String() + cachedTokens = data.Get("response.usage.input_tokens_details.cached_tokens").Int() + } + } + + if !gotTextDone { + t.Fatalf("missing response.output_text.done event") + } + if posTextDone == -1 || posPartDone == -1 || posMessageDone == -1 || posFuncAdded == -1 { + t.Fatalf("missing ordering events: textDone=%d partDone=%d messageDone=%d funcAdded=%d", posTextDone, posPartDone, posMessageDone, posFuncAdded) + } + if !(posTextDone < posPartDone && posPartDone < posMessageDone && posMessageDone < posFuncAdded) { + t.Fatalf("unexpected message/function ordering: textDone=%d partDone=%d messageDone=%d funcAdded=%d", posTextDone, posPartDone, posMessageDone, posFuncAdded) + } + if !gotMessageDone { + t.Fatalf("missing message response.output_item.done event") + } + if !gotFuncDone { + t.Fatalf("missing function_call response.output_item.done event") + } + if !gotResponseDone { + t.Fatalf("missing response.completed event") + } + + if textDone != "让我先了解" { + t.Fatalf("unexpected output_text.done text: got %q", textDone) + } + if messageText != "让我先了解" { + t.Fatalf("unexpected message done text: got %q", messageText) + } + + if responseID != "resp_req_vrtx_1" { + t.Fatalf("unexpected response id: got %q", responseID) + } + if instructions != "test instructions" { + t.Fatalf("unexpected instructions echo: got %q", instructions) + } + if cachedTokens != 2 { + t.Fatalf("unexpected cached token count: got %d", cachedTokens) + } + + if funcName != "mcp__serena__list_dir" { + t.Fatalf("unexpected function name: got %q", funcName) + } + if !gjson.Valid(funcArgs) { + t.Fatalf("invalid function arguments JSON: %q", funcArgs) + } + if gjson.Get(funcArgs, "recursive").Bool() != false { + t.Fatalf("unexpected recursive arg: %v", gjson.Get(funcArgs, "recursive").Value()) + } + if gjson.Get(funcArgs, "relative_path").String() != "internal" { + t.Fatalf("unexpected relative_path arg: %q", gjson.Get(funcArgs, "relative_path").String()) + } +} + +func TestConvertGeminiResponseToOpenAIResponses_ReasoningEncryptedContent(t *testing.T) { + sig := "RXE0RENrZ0lDeEFDR0FJcVFOZDdjUzlleGFuRktRdFcvSzNyZ2MvWDNCcDQ4RmxSbGxOWUlOVU5kR1l1UHMrMGdkMVp0Vkg3ekdKU0g4YVljc2JjN3lNK0FrdGpTNUdqamI4T3Z0VVNETzdQd3pmcFhUOGl3U3hXUEJvTVFRQ09mWTFyMEtTWGZxUUlJakFqdmFGWk83RW1XRlBKckJVOVpkYzdDKw==" + in := []string{ + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"thought":true,"thoughtSignature":"` + sig + `","text":""}]}}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"thought":true,"text":"a"}]}}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"hello"}]}}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"modelVersion":"test-model","responseId":"req_vrtx_sig"},"traceId":"t1"}`, + } + + var param any + var out []string + for _, line := range in { + out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", nil, nil, []byte(line), ¶m)...) + } + + var ( + addedEnc string + doneEnc string + ) + for _, chunk := range out { + ev, data := parseSSEEvent(t, chunk) + switch ev { + case "response.output_item.added": + if data.Get("item.type").String() == "reasoning" { + addedEnc = data.Get("item.encrypted_content").String() + } + case "response.output_item.done": + if data.Get("item.type").String() == "reasoning" { + doneEnc = data.Get("item.encrypted_content").String() + } + } + } + + if addedEnc != sig { + t.Fatalf("unexpected encrypted_content in response.output_item.added: got %q", addedEnc) + } + if doneEnc != sig { + t.Fatalf("unexpected encrypted_content in response.output_item.done: got %q", doneEnc) + } +} + +func TestConvertGeminiResponseToOpenAIResponses_FunctionCallEventOrder(t *testing.T) { + in := []string{ + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool0"}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool1"}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool2","args":{"a":1}}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":10,"candidatesTokenCount":5,"totalTokenCount":15,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_1"},"traceId":"t1"}`, + } + + var param any + var out []string + for _, line := range in { + out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", nil, nil, []byte(line), ¶m)...) + } + + posAdded := []int{-1, -1, -1} + posArgsDelta := []int{-1, -1, -1} + posArgsDone := []int{-1, -1, -1} + posItemDone := []int{-1, -1, -1} + posCompleted := -1 + deltaByIndex := map[int]string{} + + for i, chunk := range out { + ev, data := parseSSEEvent(t, chunk) + switch ev { + case "response.output_item.added": + if data.Get("item.type").String() != "function_call" { + continue + } + idx := int(data.Get("output_index").Int()) + if idx >= 0 && idx < len(posAdded) { + posAdded[idx] = i + } + case "response.function_call_arguments.delta": + idx := int(data.Get("output_index").Int()) + if idx >= 0 && idx < len(posArgsDelta) { + posArgsDelta[idx] = i + deltaByIndex[idx] = data.Get("delta").String() + } + case "response.function_call_arguments.done": + idx := int(data.Get("output_index").Int()) + if idx >= 0 && idx < len(posArgsDone) { + posArgsDone[idx] = i + } + case "response.output_item.done": + if data.Get("item.type").String() != "function_call" { + continue + } + idx := int(data.Get("output_index").Int()) + if idx >= 0 && idx < len(posItemDone) { + posItemDone[idx] = i + } + case "response.completed": + posCompleted = i + + output := data.Get("response.output") + if !output.Exists() || !output.IsArray() { + t.Fatalf("missing response.output in response.completed") + } + if len(output.Array()) != 3 { + t.Fatalf("unexpected response.output length: got %d", len(output.Array())) + } + if data.Get("response.output.0.name").String() != "tool0" || data.Get("response.output.0.arguments").String() != "{}" { + t.Fatalf("unexpected output[0]: %s", data.Get("response.output.0").Raw) + } + if data.Get("response.output.1.name").String() != "tool1" || data.Get("response.output.1.arguments").String() != "{}" { + t.Fatalf("unexpected output[1]: %s", data.Get("response.output.1").Raw) + } + if data.Get("response.output.2.name").String() != "tool2" { + t.Fatalf("unexpected output[2] name: %s", data.Get("response.output.2").Raw) + } + if !gjson.Valid(data.Get("response.output.2.arguments").String()) { + t.Fatalf("unexpected output[2] arguments: %q", data.Get("response.output.2.arguments").String()) + } + } + } + + if posCompleted == -1 { + t.Fatalf("missing response.completed event") + } + for idx := 0; idx < 3; idx++ { + if posAdded[idx] == -1 || posArgsDelta[idx] == -1 || posArgsDone[idx] == -1 || posItemDone[idx] == -1 { + t.Fatalf("missing function call events for output_index %d: added=%d argsDelta=%d argsDone=%d itemDone=%d", idx, posAdded[idx], posArgsDelta[idx], posArgsDone[idx], posItemDone[idx]) + } + if !(posAdded[idx] < posArgsDelta[idx] && posArgsDelta[idx] < posArgsDone[idx] && posArgsDone[idx] < posItemDone[idx]) { + t.Fatalf("unexpected ordering for output_index %d: added=%d argsDelta=%d argsDone=%d itemDone=%d", idx, posAdded[idx], posArgsDelta[idx], posArgsDone[idx], posItemDone[idx]) + } + if idx > 0 && !(posItemDone[idx-1] < posAdded[idx]) { + t.Fatalf("function call events overlap between %d and %d: prevDone=%d nextAdded=%d", idx-1, idx, posItemDone[idx-1], posAdded[idx]) + } + } + + if deltaByIndex[0] != "{}" { + t.Fatalf("unexpected delta for output_index 0: got %q", deltaByIndex[0]) + } + if deltaByIndex[1] != "{}" { + t.Fatalf("unexpected delta for output_index 1: got %q", deltaByIndex[1]) + } + if deltaByIndex[2] == "" || !gjson.Valid(deltaByIndex[2]) || gjson.Get(deltaByIndex[2], "a").Int() != 1 { + t.Fatalf("unexpected delta for output_index 2: got %q", deltaByIndex[2]) + } + if !(posItemDone[2] < posCompleted) { + t.Fatalf("response.completed should be after last output_item.done: last=%d completed=%d", posItemDone[2], posCompleted) + } +} + +func TestConvertGeminiResponseToOpenAIResponses_ResponseOutputOrdering(t *testing.T) { + in := []string{ + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"tool0","args":{"x":"y"}}}]}}],"modelVersion":"test-model","responseId":"req_vrtx_2"},"traceId":"t2"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"hi"}]}}],"modelVersion":"test-model","responseId":"req_vrtx_2"},"traceId":"t2"}`, + `data: {"response":{"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2,"cachedContentTokenCount":0},"modelVersion":"test-model","responseId":"req_vrtx_2"},"traceId":"t2"}`, + } + + var param any + var out []string + for _, line := range in { + out = append(out, ConvertGeminiResponseToOpenAIResponses(context.Background(), "test-model", nil, nil, []byte(line), ¶m)...) + } + + posFuncDone := -1 + posMsgAdded := -1 + posCompleted := -1 + + for i, chunk := range out { + ev, data := parseSSEEvent(t, chunk) + switch ev { + case "response.output_item.done": + if data.Get("item.type").String() == "function_call" && data.Get("output_index").Int() == 0 { + posFuncDone = i + } + case "response.output_item.added": + if data.Get("item.type").String() == "message" && data.Get("output_index").Int() == 1 { + posMsgAdded = i + } + case "response.completed": + posCompleted = i + if data.Get("response.output.0.type").String() != "function_call" { + t.Fatalf("expected response.output[0] to be function_call: %s", data.Get("response.output.0").Raw) + } + if data.Get("response.output.1.type").String() != "message" { + t.Fatalf("expected response.output[1] to be message: %s", data.Get("response.output.1").Raw) + } + if data.Get("response.output.1.content.0.text").String() != "hi" { + t.Fatalf("unexpected message text in response.output[1]: %s", data.Get("response.output.1").Raw) + } + } + } + + if posFuncDone == -1 || posMsgAdded == -1 || posCompleted == -1 { + t.Fatalf("missing required events: funcDone=%d msgAdded=%d completed=%d", posFuncDone, posMsgAdded, posCompleted) + } + if !(posFuncDone < posMsgAdded) { + t.Fatalf("expected function_call to complete before message is added: funcDone=%d msgAdded=%d", posFuncDone, posMsgAdded) + } + if !(posMsgAdded < posCompleted) { + t.Fatalf("expected response.completed after message added: msgAdded=%d completed=%d", posMsgAdded, posCompleted) + } +}