mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Fix AG-UI reasoning role and multimodal media parsing to follow specification (#5389)
* Fix AG-UI reasoning role and multimodal media value field parsing Fix two spec compliance issues in the AG-UI integration: 1. ReasoningMessageStartEvent now uses role='reasoning' instead of role='assistant', matching the AG-UI specification for reasoning messages. 2. _parse_multimodal_media_part now reads the 'value' field from source dicts (with fallback to 'data' for backward compatibility), matching the current AG-UI InputContentSource specification. Bump ag-ui-protocol dependency from ==0.1.13 to >=0.1.16,<0.2 to pick up the SDK fix that accepts role='reasoning' in ReasoningMessageStartEvent. Fix pre-existing pyright reportMissingImports errors for orjson in sample files, and fix import ordering in foundry-hosted-agents sample. Fixes #5340 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Python: Fix AG-UI reasoning role and multimodal media parsing to follow specification Fixes #5340 * Remove unintended .maf-runtime-ready marker file Address PR review feedback: the .maf-runtime-ready file is not referenced anywhere in the repo and was left over from automation. Fixes #5340 * Python: Fix duplicate AG-UI multimodal 'value' parsing in snapshot path The snapshot normalization path used a second copy of the multimodal source parsing logic that still read the deprecated 'data' field. When clients sent base64 media with source={"type": "base64", "value": ...}, the snapshot event emitted by the server dropped the payload, causing AG-UI-compatible clients to crash on ingest. Extract the shared source-field extraction into _extract_multimodal_source_fields so both _parse_multimodal_media_part and the snapshot _legacy_binary_part stay in sync with the AG-UI spec. Add snapshot-path regression tests covering value-only, value-preferred-over-data, and the legacy data-field fallback. Addresses review feedback on #5389 from @Rickyneer. --------- Co-authored-by: Copilot <copilot@github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
0989e68d1c
commit
932ceddf95
@@ -263,27 +263,21 @@ def _deduplicate_messages(messages: list[Message]) -> list[Message]:
|
||||
return unique_messages
|
||||
|
||||
|
||||
def _parse_multimodal_media_part(part: dict[str, Any]) -> Content | None:
|
||||
"""Convert a multimodal media part into Agent Framework content."""
|
||||
part_type = str(part.get("type", "")).lower()
|
||||
source = part.get("source")
|
||||
def _extract_multimodal_source_fields(
|
||||
part: dict[str, Any],
|
||||
) -> tuple[str | None, str | None, str | None, str | None]:
|
||||
"""Extract ``(url, data, binary_id, mime_type)`` from an AG-UI multimodal part.
|
||||
|
||||
mime_type = cast(
|
||||
str | None,
|
||||
part.get("mimeType")
|
||||
or part.get("mime_type")
|
||||
or {
|
||||
"image": "image/*",
|
||||
"audio": "audio/*",
|
||||
"video": "video/*",
|
||||
"document": "application/octet-stream",
|
||||
"binary": "application/octet-stream",
|
||||
}.get(part_type, "application/octet-stream"),
|
||||
)
|
||||
Handles both the current AG-UI spec (``source.value`` for base64 payloads) and the
|
||||
legacy ``source.data`` field for backward compatibility. Returned values are the
|
||||
raw extracted strings (or ``None`` when absent); callers apply their own defaults.
|
||||
"""
|
||||
mime_type = cast(str | None, part.get("mimeType") or part.get("mime_type"))
|
||||
url = cast(str | None, part.get("url") or part.get("uri"))
|
||||
data = cast(str | None, part.get("data"))
|
||||
binary_id = cast(str | None, part.get("id"))
|
||||
|
||||
source = part.get("source")
|
||||
if isinstance(source, dict):
|
||||
source_dict = cast(dict[str, Any], source)
|
||||
source_type = str(source_dict.get("type", "")).lower()
|
||||
@@ -294,14 +288,31 @@ def _parse_multimodal_media_part(part: dict[str, Any]) -> Content | None:
|
||||
if source_type in {"url", "uri"}:
|
||||
url = cast(str | None, source_dict.get("url") or source_dict.get("uri"))
|
||||
elif source_type in {"base64", "data", "binary"}:
|
||||
data = cast(str | None, source_dict.get("data"))
|
||||
data = cast(str | None, source_dict.get("value") or source_dict.get("data"))
|
||||
elif source_type in {"id", "file"}:
|
||||
binary_id = cast(str | None, source_dict.get("id"))
|
||||
else:
|
||||
url = cast(str | None, source_dict.get("url") or source_dict.get("uri") or url)
|
||||
data = cast(str | None, source_dict.get("data") or data)
|
||||
data = cast(str | None, source_dict.get("value") or source_dict.get("data") or data)
|
||||
binary_id = cast(str | None, source_dict.get("id") or binary_id)
|
||||
|
||||
return url, data, binary_id, mime_type
|
||||
|
||||
|
||||
def _parse_multimodal_media_part(part: dict[str, Any]) -> Content | None:
|
||||
"""Convert a multimodal media part into Agent Framework content."""
|
||||
part_type = str(part.get("type", "")).lower()
|
||||
url, data, binary_id, mime_type = _extract_multimodal_source_fields(part)
|
||||
|
||||
if not mime_type:
|
||||
mime_type = {
|
||||
"image": "image/*",
|
||||
"audio": "audio/*",
|
||||
"video": "video/*",
|
||||
"document": "application/octet-stream",
|
||||
"binary": "application/octet-stream",
|
||||
}.get(part_type, "application/octet-stream")
|
||||
|
||||
if isinstance(url, str) and url:
|
||||
return Content.from_uri(uri=url, media_type=mime_type)
|
||||
|
||||
@@ -389,30 +400,7 @@ def _normalize_snapshot_content(content: Any) -> Any:
|
||||
def _legacy_binary_part(part: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Convert draft/legacy multimodal parts to AG-UI snapshot binary shape."""
|
||||
normalized: dict[str, Any] = {"type": "binary"}
|
||||
|
||||
mime_type = cast(str | None, part.get("mimeType") or part.get("mime_type"))
|
||||
url = cast(str | None, part.get("url") or part.get("uri"))
|
||||
data = cast(str | None, part.get("data"))
|
||||
binary_id = cast(str | None, part.get("id"))
|
||||
|
||||
source = part.get("source")
|
||||
if isinstance(source, dict):
|
||||
source_part = cast(dict[str, Any], source)
|
||||
source_mime = source_part.get("mimeType") or source_part.get("mime_type")
|
||||
if isinstance(source_mime, str) and source_mime:
|
||||
mime_type = source_mime
|
||||
|
||||
source_type = str(source_part.get("type", "")).lower()
|
||||
if source_type in {"url", "uri"}:
|
||||
url = cast(str | None, source_part.get("url") or source_part.get("uri"))
|
||||
elif source_type in {"base64", "data", "binary"}:
|
||||
data = cast(str | None, source_part.get("data"))
|
||||
elif source_type in {"id", "file"}:
|
||||
binary_id = cast(str | None, source_part.get("id"))
|
||||
else:
|
||||
url = cast(str | None, source_part.get("url") or source_part.get("uri") or url)
|
||||
data = cast(str | None, source_part.get("data") or data)
|
||||
binary_id = cast(str | None, source_part.get("id") or binary_id)
|
||||
url, data, binary_id, mime_type = _extract_multimodal_source_fields(part)
|
||||
|
||||
if isinstance(mime_type, str) and mime_type:
|
||||
normalized["mimeType"] = mime_type
|
||||
|
||||
@@ -596,7 +596,7 @@ def _emit_text_reasoning(content: Content, flow: FlowState | None = None) -> lis
|
||||
events.extend(_close_reasoning_block(flow))
|
||||
# Open new reasoning block.
|
||||
events.append(ReasoningStartEvent(message_id=message_id))
|
||||
events.append(ReasoningMessageStartEvent(message_id=message_id, role="assistant"))
|
||||
events.append(ReasoningMessageStartEvent(message_id=message_id, role="reasoning"))
|
||||
flow.reasoning_message_id = message_id
|
||||
|
||||
if text:
|
||||
@@ -613,7 +613,7 @@ def _emit_text_reasoning(content: Content, flow: FlowState | None = None) -> lis
|
||||
else:
|
||||
# No flow -- backward-compatible full sequence per call.
|
||||
events.append(ReasoningStartEvent(message_id=message_id))
|
||||
events.append(ReasoningMessageStartEvent(message_id=message_id, role="assistant"))
|
||||
events.append(ReasoningMessageStartEvent(message_id=message_id, role="reasoning"))
|
||||
|
||||
if text:
|
||||
events.append(ReasoningMessageContentEvent(message_id=message_id, delta=text))
|
||||
|
||||
@@ -23,7 +23,7 @@ classifiers = [
|
||||
]
|
||||
dependencies = [
|
||||
"agent-framework-core>=1.1.1,<2",
|
||||
"ag-ui-protocol==0.1.13",
|
||||
"ag-ui-protocol>=0.1.16,<0.2",
|
||||
"fastapi>=0.115.0,<0.133.1",
|
||||
"uvicorn[standard]>=0.30.0,<0.42.0"
|
||||
]
|
||||
|
||||
@@ -536,6 +536,77 @@ def test_agui_snapshot_format_preserves_multimodal_content():
|
||||
assert content_parts[1]["url"] == "https://example.com/image.png"
|
||||
|
||||
|
||||
def test_agui_snapshot_format_reads_base64_value_field():
|
||||
"""Snapshot normalization reads the spec 'value' field for base64 sources."""
|
||||
payload = base64.b64encode(b"abc").decode("utf-8")
|
||||
normalized = agui_messages_to_snapshot_format(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {"type": "base64", "value": payload, "mimeType": "image/png"},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
binary_part = normalized[0]["content"][0]
|
||||
assert binary_part["type"] == "binary"
|
||||
assert binary_part["mimeType"] == "image/png"
|
||||
assert binary_part["data"] == payload
|
||||
|
||||
|
||||
def test_agui_snapshot_format_base64_value_preferred_over_data():
|
||||
"""Snapshot normalization prefers 'value' when both 'value' and 'data' are set."""
|
||||
value_payload = base64.b64encode(b"new-spec").decode("utf-8")
|
||||
data_payload = base64.b64encode(b"legacy").decode("utf-8")
|
||||
normalized = agui_messages_to_snapshot_format(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"value": value_payload,
|
||||
"data": data_payload,
|
||||
"mimeType": "image/png",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
binary_part = normalized[0]["content"][0]
|
||||
assert binary_part["data"] == value_payload
|
||||
|
||||
|
||||
def test_agui_snapshot_format_base64_data_field_backward_compat():
|
||||
"""Snapshot normalization still reads the legacy 'data' field when 'value' is absent."""
|
||||
payload = base64.b64encode(b"legacy").decode("utf-8")
|
||||
normalized = agui_messages_to_snapshot_format(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {"type": "base64", "data": payload, "mimeType": "image/png"},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
binary_part = normalized[0]["content"][0]
|
||||
assert binary_part["data"] == payload
|
||||
|
||||
|
||||
def test_agui_with_tool_calls_to_agent_framework():
|
||||
"""Assistant message with tool_calls is converted to FunctionCallContent."""
|
||||
agui_msg = {
|
||||
@@ -1760,3 +1831,67 @@ class TestReasoningRoundTrip:
|
||||
assert "First answer" in texts
|
||||
assert "Follow-up question" in texts
|
||||
assert "Prior reasoning" not in texts
|
||||
|
||||
|
||||
def test_parse_multimodal_media_part_base64_value_field():
|
||||
"""Source with type='base64' reads data from the 'value' field per AG-UI spec."""
|
||||
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
|
||||
|
||||
result = _parse_multimodal_media_part(
|
||||
{"type": "image", "source": {"type": "base64", "value": "aGVsbG8=", "mimeType": "image/png"}}
|
||||
)
|
||||
assert result is not None
|
||||
assert "aGVsbG8=" in result.uri
|
||||
|
||||
|
||||
def test_parse_multimodal_media_part_data_source_value_field():
|
||||
"""Source with type='data' reads data from the 'value' field per AG-UI spec."""
|
||||
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
|
||||
|
||||
result = _parse_multimodal_media_part(
|
||||
{"type": "image", "source": {"type": "data", "value": "aGVsbG8=", "mimeType": "image/png"}}
|
||||
)
|
||||
assert result is not None
|
||||
assert "aGVsbG8=" in result.uri
|
||||
|
||||
|
||||
def test_parse_multimodal_media_part_base64_data_field_backward_compat():
|
||||
"""Source with type='base64' still supports deprecated 'data' field."""
|
||||
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
|
||||
|
||||
result = _parse_multimodal_media_part(
|
||||
{"type": "image", "source": {"type": "base64", "data": "aGVsbG8=", "mimeType": "image/png"}}
|
||||
)
|
||||
assert result is not None
|
||||
assert "aGVsbG8=" in result.uri
|
||||
|
||||
|
||||
def test_parse_multimodal_media_part_value_preferred_over_data():
|
||||
"""When both 'value' and 'data' are present, 'value' takes precedence."""
|
||||
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
|
||||
|
||||
result = _parse_multimodal_media_part(
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"value": "dmFsdWU=",
|
||||
"data": "ZGF0YQ==",
|
||||
"mimeType": "image/png",
|
||||
},
|
||||
}
|
||||
)
|
||||
assert result is not None
|
||||
# 'value' field content should be used (base64 of "value")
|
||||
assert "dmFsdWU=" in result.uri
|
||||
|
||||
|
||||
def test_parse_multimodal_media_part_unknown_source_value_fallback():
|
||||
"""Unknown source type falls back to 'value' field before 'data' field."""
|
||||
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
|
||||
|
||||
result = _parse_multimodal_media_part(
|
||||
{"type": "image", "source": {"type": "custom", "value": "aGVsbG8=", "mimeType": "image/png"}}
|
||||
)
|
||||
assert result is not None
|
||||
assert "aGVsbG8=" in result.uri
|
||||
|
||||
@@ -1244,7 +1244,7 @@ class TestEmitTextReasoning:
|
||||
assert events[0].message_id == "reason_1"
|
||||
assert isinstance(events[1], ReasoningMessageStartEvent)
|
||||
assert events[1].message_id == "reason_1"
|
||||
assert events[1].role == "assistant"
|
||||
assert events[1].role == "reasoning"
|
||||
assert isinstance(events[2], ReasoningMessageContentEvent)
|
||||
assert events[2].message_id == "reason_1"
|
||||
assert events[2].delta == "The user is asking about weather, so I should call the weather tool."
|
||||
@@ -1642,6 +1642,37 @@ class TestReasoningInSnapshot:
|
||||
assert close[0].message_id == "block2"
|
||||
|
||||
|
||||
class TestReasoningEventRole:
|
||||
"""Tests that reasoning events use role='reasoning' per AG-UI spec."""
|
||||
|
||||
def test_reasoning_role_without_flow(self):
|
||||
"""ReasoningMessageStartEvent uses role='reasoning' in non-flow mode."""
|
||||
content = Content.from_text_reasoning(
|
||||
id="reason_role_1",
|
||||
text="Thinking about the question.",
|
||||
)
|
||||
|
||||
events = _emit_text_reasoning(content)
|
||||
|
||||
msg_starts = [e for e in events if isinstance(e, ReasoningMessageStartEvent)]
|
||||
assert len(msg_starts) == 1
|
||||
assert msg_starts[0].role == "reasoning"
|
||||
|
||||
def test_reasoning_role_with_flow(self):
|
||||
"""ReasoningMessageStartEvent uses role='reasoning' in streaming flow mode."""
|
||||
flow = FlowState()
|
||||
content = Content.from_text_reasoning(
|
||||
id="reason_role_2",
|
||||
text="Reasoning in streaming mode.",
|
||||
)
|
||||
|
||||
events = _emit_text_reasoning(content, flow)
|
||||
|
||||
msg_starts = [e for e in events if isinstance(e, ReasoningMessageStartEvent)]
|
||||
assert len(msg_starts) == 1
|
||||
assert msg_starts[0].role == "reasoning"
|
||||
|
||||
|
||||
async def test_session_id_matches_thread_id():
|
||||
"""Session created by run_agent_stream uses the client thread_id as session_id."""
|
||||
from conftest import StubAgent
|
||||
|
||||
Generated
+4
-4
@@ -84,14 +84,14 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "ag-ui-protocol"
|
||||
version = "0.1.13"
|
||||
version = "0.1.17"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/04/b5/fc0b65b561d00d88811c8a7d98ee735833f81554be244340950e7b65820c/ag_ui_protocol-0.1.13.tar.gz", hash = "sha256:811d7d7dcce4783dec252918f40b717ebfa559399bf6b071c4ba47c0c1e21bcb", size = 5671, upload-time = "2026-02-19T18:40:38.602Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/cb/0f/5a8ce5eb5cd7adf3f733da87d7a5a8a38f24ec9e029c1300f9495ae9c7fa/ag_ui_protocol-0.1.17.tar.gz", hash = "sha256:5fae4cfced8245c8ac329b85702fd166ff226d99dc1ea8a1ae95890826aa69e5", size = 6273, upload-time = "2026-04-20T21:09:19.436Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/9f/b833c1ab1999da35ebad54841ae85d2c2764c931da9a6f52d8541b6901b2/ag_ui_protocol-0.1.13-py3-none-any.whl", hash = "sha256:1393fa894c1e8416efe184168a50689e760d05b32f4646eebb8ff423dddf8e8f", size = 8053, upload-time = "2026-02-19T18:40:37.27Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/82/e5f1686b4c4e232818c75598d651b82088f584e59adef71802920d74f82f/ag_ui_protocol-0.1.17-py3-none-any.whl", hash = "sha256:6a9065590d21c7b9b8ae9bb1a3410ecf4d18cb8041a077d25574f792dfa504fe", size = 8648, upload-time = "2026-04-20T21:09:20.488Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -183,7 +183,7 @@ dev = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "ag-ui-protocol", specifier = "==0.1.13" },
|
||||
{ name = "ag-ui-protocol", specifier = ">=0.1.16,<0.2" },
|
||||
{ name = "agent-framework-core", editable = "packages/core" },
|
||||
{ name = "fastapi", specifier = ">=0.115.0,<0.133.1" },
|
||||
{ name = "httpx", marker = "extra == 'dev'", specifier = "==0.28.1" },
|
||||
|
||||
Reference in New Issue
Block a user