mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Stop emitting duplicate reasoning content from OpenAI response.reasoning_text.done and response.reasoning_summary_text.done events (#5162)
* Fix reasoning text done events duplicating streamed delta content (#5157) The OpenAI Responses API sends both reasoning_text.delta (incremental chunks) and reasoning_text.done (full accumulated text) events. The chat client was emitting Content for both, causing ag-ui to append the full done text onto already-accumulated delta text, producing duplicated reasoning output. Stop emitting Content for reasoning_text.done and reasoning_summary_text.done events, matching how output_text.done is already handled (not emitted). The deltas contain all the content; the done event is redundant. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(openai): emit reasoning done content as fallback when no deltas observed (#5157) Address PR review feedback: - Track item_ids that received reasoning deltas via seen_reasoning_delta_item_ids set - Emit content from done events only when no deltas were received for the item_id, preventing silent content loss on stream resumption - Add comment documenting code_interpreter done event asymmetry - Replace redundant ag-ui test with deduplication-focused test - Add integration test for delta+done sequence in OpenAI chat client tests - Add fallback path tests for done events without preceding deltas Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address review feedback for #5157: Python: [Bug]: "type": "response.reasoning_text.delta" and "response.reasoning_text.done" both get exposed as "text_reasoning" * Fix AG-UI reasoning streaming to use proper Start/End pattern (#5157) _emit_text_reasoning now follows the same streaming pattern as _emit_text: - Emits ReasoningStartEvent/ReasoningMessageStartEvent only on the first delta for a given message_id - Emits only ReasoningMessageContentEvent for subsequent deltas - Defers ReasoningMessageEndEvent/ReasoningEndEvent until _close_reasoning_block is called (on content type switch or end-of-run) This produces the correct protocol pattern: ReasoningStartEvent ReasoningMessageStartEvent ReasoningMessageContentEvent(delta1) ReasoningMessageContentEvent(delta2) ReasoningMessageEndEvent ReasoningEndEvent Instead of wrapping every delta in a full Start→End sequence. Backward compatibility is preserved: calling _emit_text_reasoning without a flow argument still produces the full sequence per call. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix import ordering lint error in AG-UI test file (#5157) Move inline import of TextMessageContentEvent to the top-level import block and ensure alphabetical ordering to satisfy ruff I001 rule. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix mypy error: rename loop variable to avoid type conflict with WorkflowEvent The 'event' variable was already typed as WorkflowEvent[Any] from the async for loop at line 590. Reusing it in the _close_reasoning_block loop (which returns list[BaseEvent]) caused an incompatible assignment error. Renamed to 'reasoning_evt' to avoid the conflict. Fixes #5162 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address review feedback for #5157: review comment fixes * narrow test result reporting to explicit pytest JUnit XML * Fix test args * Fix pytest-results-action in merge workflow and remove committed test artifacts Apply the same JUnit XML fix from python-tests.yml to python-merge-tests.yml: add --junitxml=pytest.xml to all test commands and narrow the results action path from ./python/**.xml to ./python/pytest.xml. Also remove accidentally committed pytest.xml and python-coverage.xml and add them to .gitignore. --------- Co-authored-by: Copilot <copilot@github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
1dd828d255
commit
5e8fe0be1f
@@ -512,6 +512,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
|
||||
if stream:
|
||||
function_call_ids: dict[int, tuple[str, str]] = {}
|
||||
seen_reasoning_delta_item_ids: set[str] = set()
|
||||
validated_options: dict[str, Any] | None = None
|
||||
|
||||
async def _stream() -> AsyncIterable[ChatResponseUpdate]:
|
||||
@@ -530,6 +531,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
chunk,
|
||||
options=validated_options,
|
||||
function_call_ids=function_call_ids,
|
||||
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
|
||||
)
|
||||
except Exception as ex:
|
||||
self._handle_request_error(ex)
|
||||
@@ -1930,6 +1932,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
event: OpenAIResponseStreamEvent,
|
||||
options: dict[str, Any],
|
||||
function_call_ids: dict[int, tuple[str, str]],
|
||||
seen_reasoning_delta_item_ids: set[str] | None = None,
|
||||
) -> ChatResponseUpdate:
|
||||
"""Parse an OpenAI Responses API streaming event into a ChatResponseUpdate."""
|
||||
metadata: dict[str, Any] = {}
|
||||
@@ -2008,6 +2011,8 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
contents.append(Content.from_text(text=event.delta, raw_representation=event))
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_text.delta":
|
||||
if seen_reasoning_delta_item_ids is not None:
|
||||
seen_reasoning_delta_item_ids.add(event.item_id)
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
id=event.item_id,
|
||||
@@ -2017,15 +2022,21 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_text.done":
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
id=event.item_id,
|
||||
text=event.text,
|
||||
raw_representation=event,
|
||||
# Done event carries the full accumulated text. Emit it only as a
|
||||
# fallback when no delta was already received for this item_id, to
|
||||
# avoid duplicating content in downstream accumulators (e.g. ag-ui).
|
||||
if seen_reasoning_delta_item_ids is None or event.item_id not in seen_reasoning_delta_item_ids:
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
id=event.item_id,
|
||||
text=event.text,
|
||||
raw_representation=event,
|
||||
)
|
||||
)
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_summary_text.delta":
|
||||
if seen_reasoning_delta_item_ids is not None:
|
||||
seen_reasoning_delta_item_ids.add(event.item_id)
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
id=event.item_id,
|
||||
@@ -2035,13 +2046,17 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_summary_text.done":
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
id=event.item_id,
|
||||
text=event.text,
|
||||
raw_representation=event,
|
||||
# Done event carries the full accumulated text. Emit it only as a
|
||||
# fallback when no delta was already received for this item_id, to
|
||||
# avoid duplicating content in downstream accumulators (e.g. ag-ui).
|
||||
if seen_reasoning_delta_item_ids is None or event.item_id not in seen_reasoning_delta_item_ids:
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
id=event.item_id,
|
||||
text=event.text,
|
||||
raw_representation=event,
|
||||
)
|
||||
)
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.code_interpreter_call_code.delta":
|
||||
call_id = getattr(event, "call_id", None) or getattr(event, "id", None) or event.item_id
|
||||
@@ -2065,6 +2080,9 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
# NOTE: Unlike reasoning done events, code_interpreter done events always
|
||||
# emit content because downstream consumers do not accumulate
|
||||
# code_interpreter deltas the same way.
|
||||
case "response.code_interpreter_call_code.done":
|
||||
call_id = getattr(event, "call_id", None) or getattr(event, "id", None) or event.item_id
|
||||
ci_additional_properties = {
|
||||
|
||||
@@ -2808,11 +2808,12 @@ def test_streaming_reasoning_text_delta_event() -> None:
|
||||
mock_metadata.assert_called_once_with(event)
|
||||
|
||||
|
||||
def test_streaming_reasoning_text_done_event() -> None:
|
||||
"""Test reasoning text done event creates TextReasoningContent with complete text."""
|
||||
def test_streaming_reasoning_text_done_event_skipped_after_deltas() -> None:
|
||||
"""Test reasoning text done event does not emit content when deltas were already received."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
chat_options = ChatOptions()
|
||||
function_call_ids: dict[int, tuple[str, str]] = {}
|
||||
seen_reasoning_delta_item_ids: set[str] = {"reasoning_456"}
|
||||
|
||||
event = ResponseReasoningTextDoneEvent(
|
||||
type="response.reasoning_text.done",
|
||||
@@ -2824,12 +2825,40 @@ def test_streaming_reasoning_text_done_event() -> None:
|
||||
)
|
||||
|
||||
with patch.object(client, "_get_metadata_from_response", return_value={"test": "data"}) as mock_metadata:
|
||||
response = client._parse_chunk_from_openai(event, chat_options, function_call_ids) # type: ignore
|
||||
response = client._parse_chunk_from_openai(
|
||||
event, chat_options, function_call_ids, seen_reasoning_delta_item_ids
|
||||
) # type: ignore
|
||||
|
||||
assert len(response.contents) == 0
|
||||
mock_metadata.assert_called_once_with(event)
|
||||
assert response.additional_properties == {"test": "data"}
|
||||
|
||||
|
||||
def test_streaming_reasoning_text_done_event_fallback_without_deltas() -> None:
|
||||
"""Test reasoning text done event emits content when no deltas were received for this item_id."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
chat_options = ChatOptions()
|
||||
function_call_ids: dict[int, tuple[str, str]] = {}
|
||||
seen_reasoning_delta_item_ids: set[str] = set()
|
||||
|
||||
event = ResponseReasoningTextDoneEvent(
|
||||
type="response.reasoning_text.done",
|
||||
content_index=0,
|
||||
item_id="reasoning_456",
|
||||
output_index=0,
|
||||
sequence_number=2,
|
||||
text="complete reasoning",
|
||||
)
|
||||
|
||||
with patch.object(client, "_get_metadata_from_response", return_value={"test": "data"}) as mock_metadata:
|
||||
response = client._parse_chunk_from_openai(
|
||||
event, chat_options, function_call_ids, seen_reasoning_delta_item_ids
|
||||
) # type: ignore
|
||||
|
||||
assert len(response.contents) == 1
|
||||
assert response.contents[0].type == "text_reasoning"
|
||||
assert response.contents[0].id == "reasoning_456"
|
||||
assert response.contents[0].text == "complete reasoning"
|
||||
assert response.contents[0].raw_representation == event
|
||||
mock_metadata.assert_called_once_with(event)
|
||||
assert response.additional_properties == {"test": "data"}
|
||||
|
||||
@@ -2859,11 +2888,12 @@ def test_streaming_reasoning_summary_text_delta_event() -> None:
|
||||
mock_metadata.assert_called_once_with(event)
|
||||
|
||||
|
||||
def test_streaming_reasoning_summary_text_done_event() -> None:
|
||||
"""Test reasoning summary text done event creates TextReasoningContent with complete text."""
|
||||
def test_streaming_reasoning_summary_text_done_event_skipped_after_deltas() -> None:
|
||||
"""Test reasoning summary text done event does not emit content when deltas were already received."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
chat_options = ChatOptions()
|
||||
function_call_ids: dict[int, tuple[str, str]] = {}
|
||||
seen_reasoning_delta_item_ids: set[str] = {"summary_012"}
|
||||
|
||||
event = ResponseReasoningSummaryTextDoneEvent(
|
||||
type="response.reasoning_summary_text.done",
|
||||
@@ -2875,16 +2905,94 @@ def test_streaming_reasoning_summary_text_done_event() -> None:
|
||||
)
|
||||
|
||||
with patch.object(client, "_get_metadata_from_response", return_value={"custom": "meta"}) as mock_metadata:
|
||||
response = client._parse_chunk_from_openai(event, chat_options, function_call_ids) # type: ignore
|
||||
response = client._parse_chunk_from_openai(
|
||||
event, chat_options, function_call_ids, seen_reasoning_delta_item_ids
|
||||
) # type: ignore
|
||||
|
||||
assert len(response.contents) == 0
|
||||
mock_metadata.assert_called_once_with(event)
|
||||
assert response.additional_properties == {"custom": "meta"}
|
||||
|
||||
|
||||
def test_streaming_reasoning_summary_text_done_event_fallback_without_deltas() -> None:
|
||||
"""Test reasoning summary text done event emits content when no deltas were received for this item_id."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
chat_options = ChatOptions()
|
||||
function_call_ids: dict[int, tuple[str, str]] = {}
|
||||
seen_reasoning_delta_item_ids: set[str] = set()
|
||||
|
||||
event = ResponseReasoningSummaryTextDoneEvent(
|
||||
type="response.reasoning_summary_text.done",
|
||||
item_id="summary_012",
|
||||
output_index=0,
|
||||
sequence_number=4,
|
||||
summary_index=0,
|
||||
text="complete summary",
|
||||
)
|
||||
|
||||
with patch.object(client, "_get_metadata_from_response", return_value={"custom": "meta"}) as mock_metadata:
|
||||
response = client._parse_chunk_from_openai(
|
||||
event, chat_options, function_call_ids, seen_reasoning_delta_item_ids
|
||||
) # type: ignore
|
||||
|
||||
assert len(response.contents) == 1
|
||||
assert response.contents[0].type == "text_reasoning"
|
||||
assert response.contents[0].id == "summary_012"
|
||||
assert response.contents[0].text == "complete summary"
|
||||
assert response.contents[0].raw_representation == event
|
||||
mock_metadata.assert_called_once_with(event)
|
||||
assert response.additional_properties == {"custom": "meta"}
|
||||
|
||||
|
||||
def test_streaming_reasoning_deltas_then_done_no_duplication() -> None:
|
||||
"""Sending delta events followed by a done event produces content only from deltas."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
chat_options = ChatOptions()
|
||||
function_call_ids: dict[int, tuple[str, str]] = {}
|
||||
seen_reasoning_delta_item_ids: set[str] = set()
|
||||
item_id = "reasoning_seq"
|
||||
|
||||
delta1 = ResponseReasoningTextDeltaEvent(
|
||||
type="response.reasoning_text.delta",
|
||||
content_index=0,
|
||||
item_id=item_id,
|
||||
output_index=0,
|
||||
sequence_number=1,
|
||||
delta="Hello ",
|
||||
)
|
||||
delta2 = ResponseReasoningTextDeltaEvent(
|
||||
type="response.reasoning_text.delta",
|
||||
content_index=0,
|
||||
item_id=item_id,
|
||||
output_index=0,
|
||||
sequence_number=2,
|
||||
delta="world",
|
||||
)
|
||||
done = ResponseReasoningTextDoneEvent(
|
||||
type="response.reasoning_text.done",
|
||||
content_index=0,
|
||||
item_id=item_id,
|
||||
output_index=0,
|
||||
sequence_number=3,
|
||||
text="Hello world",
|
||||
)
|
||||
|
||||
all_contents = []
|
||||
with patch.object(client, "_get_metadata_from_response", return_value={}):
|
||||
for event in [delta1, delta2, done]:
|
||||
response = client._parse_chunk_from_openai(
|
||||
event,
|
||||
chat_options,
|
||||
function_call_ids,
|
||||
seen_reasoning_delta_item_ids, # type: ignore
|
||||
)
|
||||
all_contents.extend(response.contents)
|
||||
|
||||
assert len(all_contents) == 2
|
||||
assert all_contents[0].text == "Hello "
|
||||
assert all_contents[1].text == "world"
|
||||
assert "".join(c.text for c in all_contents) == "Hello world"
|
||||
|
||||
|
||||
def test_streaming_reasoning_events_preserve_metadata() -> None:
|
||||
"""Test that reasoning events preserve metadata like regular text events."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
Reference in New Issue
Block a user